@Test
  public void testConstant() {
    double tolerancePerc = 10.0; // 10% of correct value
    int nSamples = 500;
    int nFeatures = 3;
    int constant = 100;

    INDArray featureSet = Nd4j.zeros(nSamples, nFeatures).add(constant);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    // Checking if we gets nans
    assertFalse(Double.isNaN(myNormalizer.getStd().getDouble(0)));

    myNormalizer.transform(sampleDataSet);
    // Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    // Checking to see if transformed values are close enough to zero
    assertEquals(
        Transforms.abs(sampleDataSet.getFeatures()).max(0, 1).getDouble(0, 0),
        0,
        constant * tolerancePerc / 100.0);

    myNormalizer.revert(sampleDataSet);
    // Checking if we gets nans, because std dev is zero
    assertFalse(Double.isNaN(sampleDataSet.getFeatures().min(0, 1).getDouble(0)));
    assertEquals(
        Transforms.abs(sampleDataSet.getFeatures().sub(featureSet)).min(0, 1).getDouble(0),
        0,
        constant * tolerancePerc / 100.0);
  }
Beispiel #2
0
  public double similarity(String word, int k1, String word2, int k2) {
    if (k1 > K || k2 > K) return -1;

    if (word.equals(word2) && k1 == k2) return 1.0;

    INDArray vector = Transforms.unitVec(getWordVectorMatrix(word, k1));
    INDArray vector2 = Transforms.unitVec(getWordVectorMatrix(word2, k2));
    if (vector == null || vector2 == null) return -1;
    return Nd4j.getBlasWrapper().dot(vector, vector2);
  }
 protected void getAdadeltaGradient(INDArray gradient) {
   adadeltaRMSGradient =
       adadeltaRMSGradient
           .mul(adadeltaMomentum)
           .add(gradient.mul(gradient).mul(1 - adadeltaMomentum));
   gradient.muli(
       Transforms.sqrt(adadeltaRMSUpdate.add(adadeltaEps))
           .div(Transforms.sqrt(adadeltaRMSGradient.add(adadeltaEps))));
   adadeltaRMSUpdate.mul(adadeltaMomentum).add(gradient.mul(gradient).mul(1 - adadeltaMomentum));
 }
Beispiel #4
0
 @Override
 public double calcL1() {
   if (!conf.isUseRegularization() || conf.getL1() <= 0.0) return 0.0;
   double l1 =
       Transforms.abs(getParam(GRUParamInitializer.RECURRENT_WEIGHT_KEY))
               .sum(Integer.MAX_VALUE)
               .getDouble(0)
           + Transforms.abs(getParam(GRUParamInitializer.INPUT_WEIGHT_KEY))
               .sum(Integer.MAX_VALUE)
               .getDouble(0);
   return conf.getL1() * l1;
 }
  @Test
  public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
      The obtained values are compared to the theoretical mean and std dev
    */
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;

    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean =
        Nd4j.create(new double[] {meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z});
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd =
        Nd4j.create(new double[] {stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    INDArray stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);

    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    tolerancePerc = 1; // 1% of correct value
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);

    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);

    stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
  }
  /**
   * Words nearest based on positive and negative words * @param top the top n words
   *
   * @return the words nearest the mean of the words
   */
  @Override
  public Collection<String> wordsNearest(INDArray words, int top) {
    if (lookupTable instanceof InMemoryLookupTable) {
      InMemoryLookupTable l = (InMemoryLookupTable) lookupTable;

      INDArray syn0 = l.getSyn0();

      if (!normalized) {
        synchronized (this) {
          if (!normalized) {
            syn0.diviColumnVector(syn0.norm1(1));
            normalized = true;
          }
        }
      }

      INDArray similarity = Transforms.unitVec(words).mmul(syn0.transpose());

      List<Double> highToLowSimList = getTopN(similarity, top + 20);

      List<WordSimilarity> result = new ArrayList<>();

      for (int i = 0; i < highToLowSimList.size(); i++) {
        String word = vocabCache.wordAtIndex(highToLowSimList.get(i).intValue());
        if (word != null && !word.equals("UNK") && !word.equals("STOP")) {
          INDArray otherVec = lookupTable.vector(word);
          double sim = Transforms.cosineSim(words, otherVec);

          result.add(new WordSimilarity(word, sim));
        }
      }

      Collections.sort(result, new SimilarityComparator());

      return getLabels(result, top);
    }

    Counter<String> distances = new Counter<>();

    for (String s : vocabCache.words()) {
      INDArray otherVec = lookupTable.vector(s);
      double sim = Transforms.cosineSim(words, otherVec);
      distances.incrementCount(s, sim);
    }

    distances.keepTopNKeys(top);
    return distances.keySet();
  }
  @Test
  public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    double tolerancePerc = 1; // Within 1 %
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;

    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);

    INDArray theoreticalMean = Nd4j.create(new double[] {x, y, z});

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);

    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);

    // this just has to not barf
    // myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
  }
  @Test
  public void testAdamUpdater() {
    INDArray m, v;
    double lr = 0.01;
    int iteration = 0;
    double beta1 = 0.8;
    double beta2 = 0.888;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .iterations(iteration)
            .adamMeanDecay(beta1)
            .adamVarDecay(beta2)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADAM)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, iteration, 1);

    double beta1t = FastMath.pow(beta1, iteration);
    double beta2t = FastMath.pow(beta2, iteration);
    double alphat = lr * FastMath.sqrt(1 - beta2t) / (1 - beta1t);
    if (Double.isNaN(alphat) || alphat == 0.0) alphat = epsilon;

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      m = Nd4j.zeros(val.shape());
      v = Nd4j.zeros(val.shape());

      m.muli(beta1).addi(val.mul(1.0 - beta1));
      v.muli(beta2).addi(val.mul(val).mul(1.0 - beta2));
      gradExpected = m.mul(alphat).divi(Transforms.sqrt(v).addi(epsilon));
      if (!gradExpected.equals(gradient.getGradientFor(entry.getKey()))) {
        System.out.println(Arrays.toString(gradExpected.dup().data().asFloat()));
        System.out.println(
            Arrays.toString(gradient.getGradientFor(entry.getKey()).dup().data().asFloat()));
      }
      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }

    assertEquals(beta1, layer.conf().getLayer().getAdamMeanDecay(), 1e-4);
    assertEquals(beta2, layer.conf().getLayer().getAdamVarDecay(), 1e-4);
  }
  @Test
  public void testWordsNearestBasic1() throws Exception {

    // WordVectors vec = WordVectorSerializer.loadTxtVectors(new
    // File("/ext/Temp/Models/model.dat_trans"));
    vec.setModelUtils(new BasicModelUtils<VocabWord>());

    String target = "energy";

    INDArray arr1 = vec.getWordVectorMatrix(target).dup();

    System.out.println("[-]: " + arr1);
    System.out.println("[+]: " + Transforms.unitVec(arr1));

    Collection<String> list = vec.wordsNearest(target, 10);
    log.info("Transpose model results:");
    printWords(target, list, vec);

    list = vec.wordsNearest(target, 10);
    log.info("Transpose model results 2:");
    printWords(target, list, vec);

    list = vec.wordsNearest(target, 10);
    log.info("Transpose model results 3:");
    printWords(target, list, vec);

    INDArray arr2 = vec.getWordVectorMatrix(target).dup();

    assertEquals(arr1, arr2);
  }
  /**
   * Returns the similarity of 2 words. Result value will be in range [-1,1], where -1.0 is exact
   * opposite similarity, i.e. NO similarity, and 1.0 is total match of two word vectors. However,
   * most of time you'll see values in range [0,1], but that's something depends of training corpus.
   *
   * <p>Returns NaN if any of labels not exists in vocab, or any label is null
   *
   * @param label1 the first word
   * @param label2 the second word
   * @return a normalized similarity (cosine similarity)
   */
  @Override
  public double similarity(String label1, String label2) {
    if (label1 == null || label2 == null) {
      log.debug(
          "LABELS: "
              + label1
              + ": "
              + (label1 == null ? "null" : EXISTS)
              + ";"
              + label2
              + " vec2:"
              + (label2 == null ? "null" : EXISTS));
      return Double.NaN;
    }

    INDArray vec1 = lookupTable.vector(label1).dup();
    INDArray vec2 = lookupTable.vector(label2).dup();

    if (vec1 == null || vec2 == null) {
      log.debug(
          label1
              + ": "
              + (vec1 == null ? "null" : EXISTS)
              + ";"
              + label2
              + " vec2:"
              + (vec2 == null ? "null" : EXISTS));
      return Double.NaN;
    }

    if (label1.equals(label2)) return 1.0;

    return Transforms.cosineSim(vec1, vec2);
  }
  /**
   * Get the top n words most similar to the given word
   *
   * @param word the word to compare
   * @param n the n to get
   * @return the top n words
   */
  public Collection<String> wordsNearestSum(String word, int n) {
    INDArray vec = Transforms.unitVec(this.getWordVectorMatrix(word));

    if (lookupTable() instanceof InMemoryLookupTable) {
      InMemoryLookupTable l = (InMemoryLookupTable) lookupTable();
      INDArray syn0 = l.getSyn0();
      INDArray weights = syn0.norm2(0).rdivi(1).muli(vec);
      INDArray distances = syn0.mulRowVector(weights).sum(1);
      INDArray[] sorted = Nd4j.sortWithIndices(distances, 0, false);
      INDArray sort = sorted[0];
      List<String> ret = new ArrayList<>();
      SequenceElement word2 = vocab().wordFor(word);
      if (n > sort.length()) n = sort.length();
      // there will be a redundant word
      for (int i = 0; i < n + 1; i++) {
        if (sort.getInt(i) == word2.getIndex()) continue;
        String add = vocab().wordAtIndex(sort.getInt(i));
        if (add == null || add.equals("UNK") || add.equals("STOP")) {
          continue;
        }

        ret.add(vocab().wordAtIndex(sort.getInt(i)));
      }

      return ret;
    }

    if (vec == null) return new ArrayList<>();
    Counter<String> distances = new Counter<>();

    for (String s : vocab().words()) {
      if (s.equals(word)) continue;
      INDArray otherVec = getWordVectorMatrix(s);
      double sim = Transforms.cosineSim(vec, otherVec);
      distances.incrementCount(s, sim);
    }

    distances.keepTopNKeys(n);
    return distances.keySet();
  }
  /**
   * Words nearest based on positive and negative words
   *
   * @param positive the positive words
   * @param negative the negative words
   * @param top the top n words
   * @return the words nearest the mean of the words
   */
  public Collection<String> wordsNearestSum(
      Collection<String> positive, Collection<String> negative, int top) {
    INDArray words = Nd4j.create(lookupTable().layerSize());
    Set<String> union = SetUtils.union(new HashSet<>(positive), new HashSet<>(negative));
    for (String s : positive) words.addi(lookupTable().vector(s));

    for (String s : negative) words.addi(lookupTable.vector(s).mul(-1));

    if (lookupTable() instanceof InMemoryLookupTable) {
      InMemoryLookupTable l = (InMemoryLookupTable) lookupTable();
      INDArray syn0 = l.getSyn0();
      INDArray weights = syn0.norm2(0).rdivi(1).muli(words);
      INDArray distances = syn0.mulRowVector(weights).sum(1);
      INDArray[] sorted = Nd4j.sortWithIndices(distances, 0, false);
      INDArray sort = sorted[0];
      List<String> ret = new ArrayList<>();
      if (top > sort.length()) top = sort.length();
      // there will be a redundant word
      int end = top;
      for (int i = 0; i < end; i++) {
        String word = vocab.wordAtIndex(sort.getInt(i));
        if (union.contains(word)) {
          end++;
          if (end >= sort.length()) break;
          continue;
        }

        String add = vocab().wordAtIndex(sort.getInt(i));
        if (add == null || add.equals("UNK") || add.equals("STOP")) {
          end++;
          if (end >= sort.length()) break;
          continue;
        }

        ret.add(vocab().wordAtIndex(sort.getInt(i)));
      }

      return ret;
    }

    Counter<String> distances = new Counter<>();

    for (String s : vocab().words()) {
      INDArray otherVec = getWordVectorMatrix(s);
      double sim = Transforms.cosineSim(words, otherVec);
      distances.incrementCount(s, sim);
    }

    distances.keepTopNKeys(top);
    return distances.keySet();
  }
  /**
   * @return
   * @throws NumberFormatException
   * @throws IOException
   * @throws FileNotFoundException
   */
  private static Word2Vec readBinaryModel(File modelFile)
      throws NumberFormatException, IOException {
    InMemoryLookupTable lookupTable;
    VocabCache cache;
    INDArray syn0;
    int words, size;
    try (BufferedInputStream bis =
            new BufferedInputStream(
                GzipUtils.isCompressedFilename(modelFile.getName())
                    ? new GZIPInputStream(new FileInputStream(modelFile))
                    : new FileInputStream(modelFile));
        DataInputStream dis = new DataInputStream(bis)) {
      words = Integer.parseInt(readString(dis));
      size = Integer.parseInt(readString(dis));
      syn0 = Nd4j.create(words, size);
      cache = new InMemoryLookupCache(false);
      lookupTable =
          (InMemoryLookupTable)
              new InMemoryLookupTable.Builder().cache(cache).vectorLength(size).build();

      String word;
      for (int i = 0; i < words; i++) {

        word = readString(dis);
        log.trace("Loading " + word + " with word " + i);
        if (word.isEmpty()) {
          continue;
        }

        float[] vector = new float[size];

        for (int j = 0; j < size; j++) {
          vector[j] = readFloat(dis);
        }

        syn0.putRow(i, Transforms.unitVec(Nd4j.create(vector)));

        cache.addWordToIndex(cache.numWords(), word);
        cache.addToken(new VocabWord(1, word));
        cache.putVocabWord(word);
      }
    }

    Word2Vec ret = new Word2Vec();

    lookupTable.setSyn0(syn0);
    ret.setVocab(cache);
    ret.setLookupTable(lookupTable);
    return ret;
  }
  @Test
  public void testRMSPropUpdater() {
    double lr = 0.01;
    double rmsDecay = 0.25;
    Map<String, INDArray> lastG = new HashMap<>();

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .rmsDecay(rmsDecay)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.RMSPROP)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      key = entry.getKey();
      val = entry.getValue();
      INDArray lastGTmp = lastG.get(key);

      if (lastGTmp == null) lastGTmp = Nd4j.zeros(val.shape());

      lastGTmp.muli(rmsDecay).addi(val.mul(val).muli(1 - rmsDecay));
      gradExpected = val.mul(lr).div(Transforms.sqrt(lastGTmp.add(Nd4j.EPS_THRESHOLD)));

      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
      lastG.put(key, lastGTmp);
    }
    assertEquals(rmsDecay, layer.conf().getLayer().getRmsDecay(), 1e-4);
  }
Beispiel #15
0
 /**
  * Apply the regularization
  *
  * @param layer
  * @param gradient
  * @param param
  */
 public void postApply(Layer layer, INDArray gradient, String param) {
   NeuralNetConfiguration conf = layer.conf();
   INDArray params = layer.getParam(param);
   if (conf.isUseRegularization()
       && conf.getLayer().getL2() > 0
       && !(param.equals(DefaultParamInitializer.BIAS_KEY)))
     gradient.addi(
         params.mul(
             conf.getLayer()
                 .getL2())); // dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function
   if (conf.isUseRegularization()
       && conf.getLayer().getL1() > 0
       && !(param.equals(DefaultParamInitializer.BIAS_KEY)))
     gradient.addi(Transforms.sign(params).muli(conf.getLayer().getL1()));
   if (conf.isMiniBatch()) gradient.divi(layer.getInputMiniBatchSize());
   if (conf.isConstrainGradientToUnitNorm()) gradient.divi(gradient.norm2(Integer.MAX_VALUE));
 }
  /**
   * @param modelFile
   * @return
   * @throws FileNotFoundException
   * @throws IOException
   * @throws NumberFormatException
   */
  private static Word2Vec readTextModel(File modelFile) throws IOException, NumberFormatException {
    InMemoryLookupTable lookupTable;
    VocabCache cache;
    INDArray syn0;
    BufferedReader reader = new BufferedReader(new FileReader(modelFile));
    String line = reader.readLine();
    String[] initial = line.split(" ");
    int words = Integer.parseInt(initial[0]);
    int layerSize = Integer.parseInt(initial[1]);
    syn0 = Nd4j.create(words, layerSize);

    cache = new InMemoryLookupCache();

    int currLine = 0;
    while ((line = reader.readLine()) != null) {
      String[] split = line.split(" ");
      String word = split[0];

      if (word.isEmpty()) {
        continue;
      }

      float[] vector = new float[split.length - 1];
      for (int i = 1; i < split.length; i++) {
        vector[i - 1] = Float.parseFloat(split[i]);
      }

      syn0.putRow(currLine, Transforms.unitVec(Nd4j.create(vector)));

      cache.addWordToIndex(cache.numWords(), word);
      cache.addToken(new VocabWord(1, word));
      cache.putVocabWord(word);
    }

    lookupTable =
        (InMemoryLookupTable)
            new InMemoryLookupTable.Builder().cache(cache).vectorLength(layerSize).build();
    lookupTable.setSyn0(syn0);

    Word2Vec ret = new Word2Vec();
    ret.setVocab(cache);
    ret.setLookupTable(lookupTable);

    reader.close();
    return ret;
  }
  /**
   * Words nearest based on positive and negative words * @param top the top n words
   *
   * @return the words nearest the mean of the words
   */
  @Override
  public Collection<String> wordsNearest(INDArray words, int top) {
    if (lookupTable() instanceof InMemoryLookupTable) {
      InMemoryLookupTable l = (InMemoryLookupTable) lookupTable();
      INDArray syn0 = l.getSyn0();
      INDArray weights = syn0.norm2(0).rdivi(1).muli(words);
      INDArray distances = syn0.mulRowVector(weights).mean(1);
      INDArray[] sorted = Nd4j.sortWithIndices(distances, 0, false);
      INDArray sort = sorted[0];
      List<String> ret = new ArrayList<>();
      if (top > sort.length()) top = sort.length();
      // there will be a redundant word
      int end = top;
      for (int i = 0; i < end; i++) {
        VocabCache vocabCache = vocab();
        int s = sort.getInt(0, i);
        String add = vocabCache.wordAtIndex(s);
        if (add == null || add.equals("UNK") || add.equals("STOP")) {
          end++;
          if (end >= sort.length()) break;
          continue;
        }

        ret.add(vocabCache.wordAtIndex(s));
      }

      return ret;
    }

    Counter<String> distances = new Counter<>();

    for (String s : vocab().words()) {
      INDArray otherVec = getWordVectorMatrix(s);
      double sim = Transforms.cosineSim(words, otherVec);
      distances.incrementCount(s, sim);
    }

    distances.keepTopNKeys(top);
    return distances.keySet();
  }
  /**
   * Get the top n words most similar to the given word
   *
   * @param word the word to compare
   * @param n the n to get
   * @return the top n words
   */
  public Collection<String> wordsNearest(String word, int n) {
    /*
       TODO: This is temporary solution and we should get rid of flat array scan. Probably, after VPTree implementation gets fixed
    */
    if (!vocab.hasToken(word)) return new ArrayList<>();

    INDArray mean = getWordVectorMatrix(word);

    Counter<String> distances = new Counter<>();

    for (String s : vocab().words()) {
      if (s.equals(word)) continue;

      INDArray otherVec = getWordVectorMatrix(s);
      double sim = Transforms.cosineSim(mean, otherVec);
      distances.incrementCount(s, sim);
    }

    distances.keepTopNKeys(n - 1);
    return distances.keySet();
    //        return wordsNearest(Arrays.asList(word),new ArrayList<String>(),n);
  }
Beispiel #19
0
  public Collection<String> wordsNearest(String word, int k, int n) {
    INDArray vector = Transforms.unitVec(getWordVectorMatrix(word, k));
    INDArray similarity = vector.mmul(syn0.transpose());
    List<Double> highToLowSimList = getTopN(similarity, n);
    List<String> ret = new ArrayList();

    for (int i = 1; i < highToLowSimList.size(); i++) {
      word =
          vocab.wordAtIndex(highToLowSimList.get(i).intValue() % vocab.numWords())
              + "("
              + highToLowSimList.get(i).intValue() / vocab.numWords()
              + ")";
      if (word != null && !word.equals("UNK") && !word.equals("STOP")) {
        ret.add(word);
        if (ret.size() >= n) {
          break;
        }
      }
    }

    return ret;
  }
  @Test
  public void testRevert() {
    double tolerancePerc = 0.01; // 0.01% of correct value
    int nSamples = 500;
    int nFeatures = 3;

    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    // System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    // System.out.println(transformed.getFeatures());
    INDArray delta =
        Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures()))
            .div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0);
    assertTrue(maxdeltaPerc < tolerancePerc);
  }
  @Test
  public void testAdaGradUpdater() {
    double lr = 1e-2;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .learningRate(lr)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    updater.update(layer, gradient, -1, 1);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);

    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
      val = entry.getValue();
      gradExpected = Transforms.sqrt(val.mul(val).add(epsilon)).rdiv(lr).mul(val);
      assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
  }
  /**
   * Returns the similarity of 2 words. Result value will be in range [-1,1], where -1.0 is exact
   * opposite similarity, i.e. NO similarity, and 1.0 is total match of two word vectors. However,
   * most of time you'll see values in range [0,1], but that's something depends of training corpus.
   *
   * @param word the first word
   * @param word2 the second word
   * @return a normalized similarity (cosine similarity)
   */
  public double similarity(String word, String word2) {
    if (word.equals(word2)) return 1.0;

    if (getWordVectorMatrix(word) == null || getWordVectorMatrix(word2) == null) return -1;
    return Transforms.cosineSim(getWordVectorMatrix(word), getWordVectorMatrix(word2));
  }
  /**
   * Words nearest based on positive and negative words
   *
   * @param positive the positive words
   * @param negative the negative words
   * @param top the top n words
   * @return the words nearest the mean of the words
   */
  @Override
  public Collection<String> wordsNearest(
      Collection<String> positive, Collection<String> negative, int top) {
    // Check every word is in the model
    for (String p : SetUtils.union(new HashSet<>(positive), new HashSet<>(negative))) {
      if (!vocab().containsWord(p)) {
        return new ArrayList<>();
      }
    }

    WeightLookupTable weightLookupTable = lookupTable();
    INDArray words = Nd4j.create(positive.size() + negative.size(), weightLookupTable.layerSize());
    int row = 0;
    Set<String> union = SetUtils.union(new HashSet<>(positive), new HashSet<>(negative));
    for (String s : positive) {
      words.putRow(row++, weightLookupTable.vector(s));
    }

    for (String s : negative) {
      words.putRow(row++, weightLookupTable.vector(s).mul(-1));
    }

    INDArray mean = words.isMatrix() ? words.mean(0) : words;
    // TODO this should probably be replaced with wordsNearest(mean, top)
    if (weightLookupTable instanceof InMemoryLookupTable) {
      InMemoryLookupTable l = (InMemoryLookupTable) weightLookupTable;

      INDArray syn0 = l.getSyn0();
      syn0.diviRowVector(syn0.norm2(0));

      INDArray similarity = Transforms.unitVec(mean).mmul(syn0.transpose());
      // We assume that syn0 is normalized.
      // Hence, the following division is not needed anymore.
      // distances.diviRowVector(distances.norm2(1));
      // INDArray[] sorted = Nd4j.sortWithIndices(distances,0,false);
      List<Double> highToLowSimList = getTopN(similarity, top + union.size());
      List<String> ret = new ArrayList<>();

      for (int i = 0; i < highToLowSimList.size(); i++) {
        String word = vocab().wordAtIndex(highToLowSimList.get(i).intValue());
        if (word != null && !word.equals("UNK") && !word.equals("STOP") && !union.contains(word)) {
          ret.add(word);
          if (ret.size() >= top) {
            break;
          }
        }
      }

      return ret;
    }

    Counter<String> distances = new Counter<>();

    for (String s : vocab().words()) {
      INDArray otherVec = getWordVectorMatrix(s);
      double sim = Transforms.cosineSim(mean, otherVec);
      distances.incrementCount(s, sim);
    }

    distances.keepTopNKeys(top);
    return distances.keySet();
  }
Beispiel #24
0
  private void backpropDerivativesAndError(
      Tree tree,
      MultiDimensionalMap<String, String, INDArray> binaryTD,
      MultiDimensionalMap<String, String, INDArray> binaryCD,
      MultiDimensionalMap<String, String, INDArray> binaryINDArrayTD,
      Map<String, INDArray> unaryCD,
      Map<String, INDArray> wordVectorD,
      INDArray deltaUp) {
    if (tree.isLeaf()) {
      return;
    }

    INDArray currentVector = tree.vector();
    String category = tree.label();
    category = basicCategory(category);

    // Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class
    INDArray goldLabel = Nd4j.create(numOuts, 1);
    int goldClass = tree.goldLabel();
    if (goldClass >= 0) {
      assert goldClass <= numOuts
          : "Tried adding a label that was >= to the number of configured outputs "
              + numOuts
              + " with label "
              + goldClass;
      goldLabel.putScalar(goldClass, 1.0f);
    }

    Double nodeWeight = classWeights.get(goldClass);
    if (nodeWeight == null) nodeWeight = 1.0;
    INDArray predictions = tree.prediction();

    // If this is an unlabeled class, transform deltaClass to 0.  We could
    // make this more efficient by eliminating various of the below
    // calculations, but this would be the easiest way to handle the
    // unlabeled class
    INDArray deltaClass = null;
    if (predictions.data().dataType() == DataBuffer.Type.DOUBLE) {
      deltaClass =
          goldClass >= 0
              ? Nd4j.getBlasWrapper().scal(nodeWeight, predictions.sub(goldLabel))
              : Nd4j.create(predictions.rows(), predictions.columns());

    } else {
      deltaClass =
          goldClass >= 0
              ? Nd4j.getBlasWrapper()
                  .scal((float) nodeWeight.doubleValue(), predictions.sub(goldLabel))
              : Nd4j.create(predictions.rows(), predictions.columns());
    }
    INDArray localCD = deltaClass.mmul(Nd4j.appendBias(currentVector).transpose());

    double error =
        -(Transforms.log(predictions).muli(goldLabel).sum(Integer.MAX_VALUE).getDouble(0));
    error = error * nodeWeight;
    tree.setError(error);

    if (tree.isPreTerminal()) { // below us is a word vector
      unaryCD.put(category, unaryCD.get(category).add(localCD));

      String word = tree.children().get(0).label();
      word = getVocabWord(word);

      INDArray currentVectorDerivative =
          Nd4j.getExecutioner()
              .execAndReturn(
                  Nd4j.getOpFactory().createTransform(activationFunction, currentVector));
      INDArray deltaFromClass = getUnaryClassification(category).transpose().mmul(deltaClass);
      deltaFromClass =
          deltaFromClass.get(interval(0, numHidden), interval(0, 1)).mul(currentVectorDerivative);
      INDArray deltaFull = deltaFromClass.add(deltaUp);
      INDArray wordVector = wordVectorD.get(word);
      wordVectorD.put(word, wordVector.add(deltaFull));

    } else {
      // Otherwise, this must be a binary node
      String leftCategory = basicCategory(tree.children().get(0).label());
      String rightCategory = basicCategory(tree.children().get(1).label());
      if (combineClassification) {
        unaryCD.put("", unaryCD.get("").add(localCD));
      } else {
        binaryCD.put(
            leftCategory, rightCategory, binaryCD.get(leftCategory, rightCategory).add(localCD));
      }

      INDArray currentVectorDerivative =
          Nd4j.getExecutioner()
              .execAndReturn(
                  Nd4j.getOpFactory().createTransform(activationFunction, currentVector));
      INDArray deltaFromClass =
          getBinaryClassification(leftCategory, rightCategory).transpose().mmul(deltaClass);

      INDArray mult = deltaFromClass.get(interval(0, numHidden), interval(0, 1));
      deltaFromClass = mult.muli(currentVectorDerivative);
      INDArray deltaFull = deltaFromClass.add(deltaUp);

      INDArray leftVector = tree.children().get(0).vector();
      INDArray rightVector = tree.children().get(1).vector();

      INDArray childrenVector = Nd4j.appendBias(leftVector, rightVector);

      // deltaFull 50 x 1, childrenVector: 50 x 2
      INDArray add = binaryTD.get(leftCategory, rightCategory);

      INDArray W_df = deltaFromClass.mmul(childrenVector.transpose());
      binaryTD.put(leftCategory, rightCategory, add.add(W_df));

      INDArray deltaDown;
      if (useDoubleTensors) {
        INDArray Wt_df = getINDArrayGradient(deltaFull, leftVector, rightVector);
        binaryINDArrayTD.put(
            leftCategory,
            rightCategory,
            binaryINDArrayTD.get(leftCategory, rightCategory).add(Wt_df));
        deltaDown =
            computeINDArrayDeltaDown(
                deltaFull,
                leftVector,
                rightVector,
                getBinaryTransform(leftCategory, rightCategory),
                getBinaryINDArray(leftCategory, rightCategory));
      } else {
        deltaDown = getBinaryTransform(leftCategory, rightCategory).transpose().mmul(deltaFull);
      }

      INDArray leftDerivative =
          Nd4j.getExecutioner()
              .execAndReturn(Nd4j.getOpFactory().createTransform(activationFunction, leftVector));
      INDArray rightDerivative =
          Nd4j.getExecutioner()
              .execAndReturn(Nd4j.getOpFactory().createTransform(activationFunction, rightVector));
      INDArray leftDeltaDown = deltaDown.get(interval(0, deltaFull.rows()), interval(0, 1));
      INDArray rightDeltaDown =
          deltaDown.get(interval(deltaFull.rows(), deltaFull.rows() * 2), interval(0, 1));
      backpropDerivativesAndError(
          tree.children().get(0),
          binaryTD,
          binaryCD,
          binaryINDArrayTD,
          unaryCD,
          wordVectorD,
          leftDerivative.mul(leftDeltaDown));
      backpropDerivativesAndError(
          tree.children().get(1),
          binaryTD,
          binaryCD,
          binaryINDArrayTD,
          unaryCD,
          wordVectorD,
          rightDerivative.mul(rightDeltaDown));
    }
  }
  @Test
  public void testAdaDeltaUpdate() {
    INDArray dxSquared;
    Map<String, INDArray> msg = new HashMap<>();
    Map<String, INDArray> msdx = new HashMap<>();

    double rho = 0.85;

    NeuralNetConfiguration conf =
        new NeuralNetConfiguration.Builder()
            .rho(rho)
            .layer(
                new DenseLayer.Builder()
                    .nIn(nIn)
                    .nOut(nOut)
                    .updater(org.deeplearning4j.nn.conf.Updater.ADADELTA)
                    .build())
            .build();

    int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf, true);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = LayerFactories.getFactory(conf).create(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);

    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());

    for (int i = 0; i < 2; i++) {
      updater.update(layer, gradient, i, 1);

      // calculations for one iteration / update

      for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        key = entry.getKey();
        val = entry.getValue();
        INDArray msgTmp = msg.get(key);
        INDArray msdxTmp = msdx.get(key);

        if (msgTmp == null) {
          msgTmp = Nd4j.zeros(val.shape());
          msdxTmp = Nd4j.zeros(val.shape());
        }

        msgTmp.muli(rho);
        msgTmp.addi(1 - rho).muli(val.mul(val));

        gradExpected =
            Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD))
                .divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD)))
                .muli(val);
        gradientDup.setGradientFor(key, gradExpected);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));

        msdxTmp.muli(rho);
        dxSquared = gradExpected.mul(gradExpected);
        msdxTmp.addi(dxSquared.muli(1 - rho));

        msg.put(key, msgTmp);
        msdx.put(key, msdxTmp);
      }
      assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
    }
  }
  @Test
  public void testTransform() {
    /*Random dataset is generated such that
       AX + B where X is from a normal distribution with mean 0 and std 1
       The mean of above will be B and std A
       Obtained mean and std dev are compared to theoretical
       Transformed values should be the same as X with the same seed.
    */
    long randSeed = 7139183;

    int nFeatures = 2;
    int nSamples = 6400;
    int bsize = 8;
    int a = 2;
    int b = 10;
    INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc;
    double maxDeltaPerc, sampleMeanSEM;

    genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed);
    genRandomDataSet beforeTransformData =
        new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);

    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    DataSetIterator normIterator = normData.getIter(bsize);
    DataSetIterator expectedIterator = expectedData.getIter(bsize);
    DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize);

    myNormalizer.fit(normIterator);

    double tolerancePerc = 5.0; // within 5%
    sampleMean = myNormalizer.getMean();
    sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean));
    assertTrue(
        sampleMeanDelta.mul(100).div(normData.theoreticalMean).max(1).getDouble(0, 0)
            < tolerancePerc);
    // sanity check to see if it's within the theoretical standard error of mean
    sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max(1).getDouble(0, 0);
    assertTrue(sampleMeanSEM < 2.6); // 99% of the time it should be within this many SEMs

    tolerancePerc = 10.0; // within 10%
    sampleStd = myNormalizer.getStd();
    sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd));
    assertTrue(
        sampleStdDelta.div(normData.theoreticalStd).max(1).mul(100).getDouble(0, 0)
            < tolerancePerc);

    normIterator.setPreProcessor(myNormalizer);
    while (normIterator.hasNext()) {
      INDArray before = beforeTransformIterator.next().getFeatures();
      INDArray after = normIterator.next().getFeatures();
      INDArray expected = expectedIterator.next().getFeatures();
      delta = Transforms.abs(after.sub(expected));
      deltaPerc = delta.div(before.sub(expected));
      deltaPerc.muli(100);
      maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0, 0);
      // System.out.println("=== BEFORE ===");
      // System.out.println(before);
      // System.out.println("=== AFTER ===");
      // System.out.println(after);
      // System.out.println("=== SHOULD BE ===");
      // System.out.println(expected);
      assertTrue(maxDeltaPerc < tolerancePerc);
    }
  }