Java Alphabet.lookupIndex Examples

Programming Language: Java

Namespace/Package Name: java.io

Class/Type: Alphabet

Method/Function: lookupIndex

Examples at hotexamples.com: 4

Java Alphabet.lookupIndex - 4 examples found. These are the top rated real world Java examples of java.io.Alphabet.lookupIndex extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

lookupObject(8)

size(6)

lookupIndex(4)

Example #1

Show file

File: TrainCRF.java Project: carriercomm/PrologMUD

  /** This is (mostly) copied from CRF4.java */
  public boolean[][] labelConnectionsIn(
      Alphabet outputAlphabet, InstanceList trainingSet, String start) {
    int numLabels = outputAlphabet.size();
    boolean[][] connections = new boolean[numLabels][numLabels];
    for (int i = 0; i < trainingSet.size(); i++) {
      Instance instance = trainingSet.getInstance(i);
      FeatureSequence output = (FeatureSequence) instance.getTarget();
      for (int j = 1; j < output.size(); j++) {
        int sourceIndex = outputAlphabet.lookupIndex(output.get(j - 1));
        int destIndex = outputAlphabet.lookupIndex(output.get(j));
        assert (sourceIndex >= 0 && destIndex >= 0);
        connections[sourceIndex][destIndex] = true;
      }
    }

    // Handle start state
    if (start != null) {
      int startIndex = outputAlphabet.lookupIndex(start);
      for (int j = 0; j < outputAlphabet.size(); j++) {
        connections[startIndex][j] = true;
      }
    }

    return connections;
  }

Example #2

Show file

File: FeatureSequence.java Project: zabak/digmap

 public void add(Object key) {
   int fi = dictionary.lookupIndex(key);
   if (fi >= 0)
     // This will happen if the dictionary is frozen,
     // and key is not already in the dictionary.
     add(fi);
   else
     // xxx Should we raise an exception if the appending doesn't happen?  "yes" -akm, added 1/2008
     throw new IllegalStateException(
         "Object cannot be added to FeatureSequence because its Alphabet is frozen.");
 }

Example #3

Show file

File: FeatureSequence.java Project: zabak/digmap

  /**
   * Remove features from the sequence that occur fewer than <code>cutoff</code> times in the
   * corpus, as indicated by the provided counts. Also swap in the new, reduced alphabet. This
   * method alters the instance in place; it is not appropriate if the original instance will be
   * needed.
   */
  public void prune(double[] counts, Alphabet newAlphabet, int cutoff) {
    // The goal is to replace the sequence of features in place, by
    //  creating a new array and then swapping it in.

    // First: figure out how long the new array will have to be

    int newLength = 0;
    for (int i = 0; i < length; i++) {
      if (counts[features[i]] >= cutoff) {
        newLength++;
      }
    }

    // Second: allocate a new features array

    int[] newFeatures = new int[newLength];

    // Third: fill the new array

    int newIndex = 0;
    for (int i = 0; i < length; i++) {
      if (counts[features[i]] >= cutoff) {

        Object feature = dictionary.lookupObject(features[i]);
        newFeatures[newIndex] = newAlphabet.lookupIndex(feature);

        newIndex++;
      }
    }

    // Fourth: swap out the arrays

    features = newFeatures;
    length = newLength;
    dictionary = newAlphabet;
  }

Example #4

Show file

File: TopicModelDiagnostics.java Project: luolanfeixue/Mallet

  public void collectDocumentStatistics() {

    topicCodocumentMatrices = new int[numTopics][numTopWords][numTopWords];
    wordTypeCounts = new int[alphabet.size()];
    numTokens = 0;

    // This is an array of hash sets containing the words-of-interest for each topic,
    //  used for checking if the word at some position is one of those words.
    IntHashSet[] topicTopWordIndices = new IntHashSet[numTopics];

    // The same as the topic top words, but with int indices instead of strings,
    //  used for iterating over positions.
    int[][] topicWordIndicesInOrder = new int[numTopics][numTopWords];

    // This is an array of hash sets that will hold the words-of-interest present in a document,
    //  which will be cleared after every document.
    IntHashSet[] docTopicWordIndices = new IntHashSet[numTopics];

    int numDocs = model.getData().size();

    // The count of each topic, again cleared after every document.
    int[] topicCounts = new int[numTopics];

    for (int topic = 0; topic < numTopics; topic++) {
      IntHashSet wordIndices = new IntHashSet();

      for (int i = 0; i < numTopWords; i++) {
        if (topicTopWords[topic][i] != null) {
          int type = alphabet.lookupIndex(topicTopWords[topic][i]);
          topicWordIndicesInOrder[topic][i] = type;
          wordIndices.add(type);
        }
      }

      topicTopWordIndices[topic] = wordIndices;
      docTopicWordIndices[topic] = new IntHashSet();
    }

    int doc = 0;

    for (TopicAssignment document : model.getData()) {

      FeatureSequence tokens = (FeatureSequence) document.instance.getData();
      FeatureSequence topics = (FeatureSequence) document.topicSequence;

      for (int position = 0; position < tokens.size(); position++) {
        int type = tokens.getIndexAtPosition(position);
        int topic = topics.getIndexAtPosition(position);

        numTokens++;
        wordTypeCounts[type]++;

        topicCounts[topic]++;

        if (topicTopWordIndices[topic].contains(type)) {
          docTopicWordIndices[topic].add(type);
        }
      }

      int docLength = tokens.size();

      if (docLength > 0) {
        int maxTopic = -1;
        int maxCount = -1;

        for (int topic = 0; topic < numTopics; topic++) {

          if (topicCounts[topic] > 0) {
            numNonZeroDocuments[topic]++;

            if (topicCounts[topic] > maxCount) {
              maxTopic = topic;
              maxCount = topicCounts[topic];
            }

            sumCountTimesLogCount[topic] += topicCounts[topic] * Math.log(topicCounts[topic]);

            double proportion =
                (model.alpha[topic] + topicCounts[topic]) / (model.alphaSum + docLength);
            for (int i = 0; i < DEFAULT_DOC_PROPORTIONS.length; i++) {
              if (proportion < DEFAULT_DOC_PROPORTIONS[i]) {
                break;
              }
              numDocumentsAtProportions[topic][i]++;
            }

            IntHashSet supportedWords = docTopicWordIndices[topic];
            int[] indices = topicWordIndicesInOrder[topic];

            for (int i = 0; i < numTopWords; i++) {
              if (supportedWords.contains(indices[i])) {
                for (int j = i; j < numTopWords; j++) {
                  if (i == j) {
                    // Diagonals are total number of documents with word W in topic T
                    topicCodocumentMatrices[topic][i][i]++;
                  } else if (supportedWords.contains(indices[j])) {
                    topicCodocumentMatrices[topic][i][j]++;
                    topicCodocumentMatrices[topic][j][i]++;
                  }
                }
              }
            }

            docTopicWordIndices[topic].clear();
            topicCounts[topic] = 0;
          }
        }

        if (maxTopic > -1) {
          numRank1Documents[maxTopic]++;
        }
      }

      doc++;
    }
  }