Java VocabularyWord.getCount Examples

Programming Language: Java

Namespace/Package Name: java.util

Class/Type: VocabularyWord

Method/Function: getCount

Examples at hotexamples.com: 4

Java VocabularyWord.getCount - 4 examples found. These are the top rated real world Java examples of java.util.VocabularyWord.getCount extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getWord(5)

getCount(4)

getHuffmanNode(3)

setFrequencyShift(3)

isSpecial(2)

setCount(2)

setHuffmanNode(2)

getFrequencyShift(1)

getHistoricalGradient(1)

getRetentionStep(1)

incrementRetentionStep(1)

setSpecial(1)

Example #1

Show file

File: VocabularyHolder.java Project: aca-singtel/deeplearning4j

  /**
   * This method removes low-frequency words based on their frequency change between activations.
   * I.e. if word has appeared only once, and it's retained the same frequency over consequence
   * activations, we can assume it can be removed freely
   */
  protected synchronized void activateScavenger() {
    int initialSize = vocabulary.size();
    List<VocabularyWord> words = new ArrayList<>(vocabulary.values());
    for (VocabularyWord word : words) {
      // scavenging could be applied only to non-special tokens that are below minWordFrequency
      if (word.isSpecial()
          || word.getCount() >= minWordFrequency
          || word.getFrequencyShift() == null) {
        word.setFrequencyShift(null);
        continue;
      }

      // save current word counter to byte array at specified position
      word.getFrequencyShift()[word.getRetentionStep()] = (byte) word.getCount();

      /*
            we suppose that we're hunting only low-freq words that already passed few activations
            so, we assume word personal threshold as 20% of minWordFrequency, but not less then 1.

            so, if after few scavenging cycles wordCount is still <= activation - just remove word.
            otherwise nullify word.frequencyShift to avoid further checks
      */
      int activation = Math.max(minWordFrequency / 5, 2);
      logger.debug(
          "Current state> Activation: ["
              + activation
              + "], retention info: "
              + Arrays.toString(word.getFrequencyShift()));
      if (word.getCount() <= activation && word.getFrequencyShift()[this.retentionDelay - 1] > 0) {

        // if final word count at latest retention point is the same as at the beginning - just
        // remove word
        if (word.getFrequencyShift()[this.retentionDelay - 1] <= activation
            && word.getFrequencyShift()[this.retentionDelay - 1] == word.getFrequencyShift()[0]) {
          vocabulary.remove(word.getWord());
        }
      }

      // shift retention history to the left
      if (word.getRetentionStep() < retentionDelay - 1) {
        word.incrementRetentionStep();
      } else {
        for (int x = 1; x < retentionDelay; x++) {
          word.getFrequencyShift()[x - 1] = word.getFrequencyShift()[x];
        }
      }
    }
    logger.info(
        "Scavenger was activated. Vocab size before: ["
            + initialSize
            + "],  after: ["
            + vocabulary.size()
            + "]");
  }

Example #2

Show file

File: VocabularyHolder.java Project: aca-singtel/deeplearning4j

 public long totalWordsBeyondLimit() {
   if (totalWordOccurencies == 0) {
     for (VocabularyWord word : vocabulary.values()) {
       totalWordOccurencies += word.getCount();
     }
     return totalWordOccurencies;
   } else return totalWordOccurencies;
 }

Example #3

Show file

File: VocabularyHolder.java Project: aca-singtel/deeplearning4j

  /**
   * This method is required for compatibility purposes. It just transfers vocabulary from
   * VocabHolder into VocabCache
   *
   * @param cache
   */
  public void transferBackToVocabCache(VocabCache cache, boolean emptyHolder) {
    if (!(cache instanceof InMemoryLookupCache))
      throw new IllegalStateException("Sorry, only InMemoryLookupCache use implemented.");

    // make sure that huffman codes are updated before transfer
    List<VocabularyWord> words = words(); // updateHuffmanCodes();

    for (VocabularyWord word : words) {
      if (word.getWord().isEmpty()) continue;
      VocabWord vocabWord = new VocabWord(1, word.getWord());

      // if we're transferring full model, it CAN contain HistoricalGradient for AdaptiveGradient
      // feature
      if (word.getHistoricalGradient() != null) {
        INDArray gradient = Nd4j.create(word.getHistoricalGradient());
        vocabWord.setHistoricalGradient(gradient);
      }

      // put VocabWord into both Tokens and Vocabs maps
      ((InMemoryLookupCache) cache).getVocabs().put(word.getWord(), vocabWord);
      ((InMemoryLookupCache) cache).getTokens().put(word.getWord(), vocabWord);

      // update Huffman tree information
      if (word.getHuffmanNode() != null) {
        vocabWord.setIndex(word.getHuffmanNode().getIdx());
        vocabWord.setCodeLength(word.getHuffmanNode().getLength());
        vocabWord.setPoints(
            arrayToList(word.getHuffmanNode().getPoint(), word.getHuffmanNode().getLength()));
        vocabWord.setCodes(
            arrayToList(word.getHuffmanNode().getCode(), word.getHuffmanNode().getLength()));

        // put word into index
        cache.addWordToIndex(word.getHuffmanNode().getIdx(), word.getWord());
      }

      // update vocabWord counter. substract 1, since its the base value for any token
      // >1 hack is required since VocabCache impl imples 1 as base word count, not 0
      if (word.getCount() > 1) cache.incrementWordCount(word.getWord(), word.getCount() - 1);
    }

    // at this moment its pretty safe to nullify all vocabs.
    if (emptyHolder) {
      idxMap.clear();
      vocabulary.clear();
    }
  }

Example #4

Show file

File: VocabularyHolder.java Project: aca-singtel/deeplearning4j

  /**
   * All words with frequency below threshold wii be removed
   *
   * @param threshold exclusive threshold for removal
   */
  public void truncateVocabulary(int threshold) {
    logger.debug("Truncating vocabulary to minWordFrequency: [" + threshold + "]");
    Set<String> keyset = vocabulary.keySet();
    for (String word : keyset) {
      VocabularyWord vw = vocabulary.get(word);

      // please note: we're not applying threshold to SPECIAL words
      if (!vw.isSpecial() && vw.getCount() < threshold) {
        vocabulary.remove(word);
        if (vw.getHuffmanNode() != null) idxMap.remove(vw.getHuffmanNode().getIdx());
      }
    }
  }