Esempi in Java per Transforms.cosineSim, esempi in Java per org.nd4j.linalg.ops.transforms.Transforms.cosineSim

Esempio n. 1

0

Mostra file

File: BasicModelUtils.java Progetto: CHENShuang1994/deeplearning4j

  /**
   * Words nearest based on positive and negative words * @param top the top n words
   *
   * @return the words nearest the mean of the words
   */
  @Override
  public Collection<String> wordsNearest(INDArray words, int top) {
    if (lookupTable instanceof InMemoryLookupTable) {
      InMemoryLookupTable l = (InMemoryLookupTable) lookupTable;

      INDArray syn0 = l.getSyn0();

      if (!normalized) {
        synchronized (this) {
          if (!normalized) {
            syn0.diviColumnVector(syn0.norm1(1));
            normalized = true;
          }
        }
      }

      INDArray similarity = Transforms.unitVec(words).mmul(syn0.transpose());

      List<Double> highToLowSimList = getTopN(similarity, top + 20);

      List<WordSimilarity> result = new ArrayList<>();

      for (int i = 0; i < highToLowSimList.size(); i++) {
        String word = vocabCache.wordAtIndex(highToLowSimList.get(i).intValue());
        if (word != null && !word.equals("UNK") && !word.equals("STOP")) {
          INDArray otherVec = lookupTable.vector(word);
          double sim = Transforms.cosineSim(words, otherVec);

          result.add(new WordSimilarity(word, sim));
        }
      }

      Collections.sort(result, new SimilarityComparator());

      return getLabels(result, top);
    }

    Counter<String> distances = new Counter<>();

    for (String s : vocabCache.words()) {
      INDArray otherVec = lookupTable.vector(s);
      double sim = Transforms.cosineSim(words, otherVec);
      distances.incrementCount(s, sim);
    }

    distances.keepTopNKeys(top);
    return distances.keySet();
  }

Esempio n. 2

0

Mostra file

File: BasicModelUtils.java Progetto: CHENShuang1994/deeplearning4j

  /**
   * Returns the similarity of 2 words. Result value will be in range [-1,1], where -1.0 is exact
   * opposite similarity, i.e. NO similarity, and 1.0 is total match of two word vectors. However,
   * most of time you'll see values in range [0,1], but that's something depends of training corpus.
   *
   * <p>Returns NaN if any of labels not exists in vocab, or any label is null
   *
   * @param label1 the first word
   * @param label2 the second word
   * @return a normalized similarity (cosine similarity)
   */
  @Override
  public double similarity(String label1, String label2) {
    if (label1 == null || label2 == null) {
      log.debug(
          "LABELS: "
              + label1
              + ": "
              + (label1 == null ? "null" : EXISTS)
              + ";"
              + label2
              + " vec2:"
              + (label2 == null ? "null" : EXISTS));
      return Double.NaN;
    }

    INDArray vec1 = lookupTable.vector(label1).dup();
    INDArray vec2 = lookupTable.vector(label2).dup();

    if (vec1 == null || vec2 == null) {
      log.debug(
          label1
              + ": "
              + (vec1 == null ? "null" : EXISTS)
              + ";"
              + label2
              + " vec2:"
              + (vec2 == null ? "null" : EXISTS));
      return Double.NaN;
    }

    if (label1.equals(label2)) return 1.0;

    return Transforms.cosineSim(vec1, vec2);
  }

Esempio n. 3

0

Mostra file