Java Ngram Examples

Programming Language: Java

Namespace/Package Name: joshua.util

Class/Type: Ngram

Examples at hotexamples.com: 1

Java Ngram - 1 examples found. These are the top rated real world Java examples of joshua.util.Ngram extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getNgrams(2)

increaseCount(1)

Example #1

Show file

File: NbestMinRiskReranker.java Project: mtfelix/joshua

  public String processOneSent(List<String> nbest, int sentID) {
    System.err.println("Now process sentence " + sentID);

    // step-0: preprocess
    // assumption: each hyp has a formate:
    // "sent_id ||| hyp_itself ||| feature scores ||| linear-combination-of-feature-scores(this
    // should be logP)"

    List<String> hypsItself = new ArrayList<String>();
    // ArrayList<String> l_feat_scores = new ArrayList<String>();
    List<Double> baselineScores = new ArrayList<Double>(); // linear combination of all baseline
    // features
    List<HashMap<String, Integer>> ngramTbls = new ArrayList<HashMap<String, Integer>>();
    List<Integer> sentLens = new ArrayList<Integer>();

    for (String hyp : nbest) {
      String[] fds = Regex.threeBarsWithSpace.split(hyp);
      int tSentID = Integer.parseInt(fds[0]);
      if (sentID != tSentID) {
        throw new RuntimeException("sentence_id does not match");
      }
      String hypothesis = (fds.length == 4) ? fds[1] : "";
      hypsItself.add(hypothesis);

      String[] words = Regex.spaces.split(hypothesis);
      sentLens.add(words.length);

      HashMap<String, Integer> ngramTbl = new HashMap<String, Integer>();
      Ngram.getNgrams(ngramTbl, 1, bleuOrder, words);
      ngramTbls.add(ngramTbl);

      // l_feat_scores.add(fds[2]);

      // The value of finalIndex is expected to be 3,
      // unless the hyp_itself is empty,
      // in which case finalIndex will be 2.
      int finalIndex = fds.length - 1;
      baselineScores.add(Double.parseDouble(fds[finalIndex]));
    }

    // step-1: get normalized distribution

    /** value in baselineScores will be changed to normalized probability */
    computeNormalizedProbs(baselineScores, scalingFactor);

    List<Double> normalizedProbs = baselineScores;

    // === required by google linear corpus gain
    HashMap<String, Double> posteriorCountsTbl = null;
    if (useGoogleLinearCorpusGain) {
      posteriorCountsTbl = new HashMap<String, Double>();
      getGooglePosteriorCounts(ngramTbls, normalizedProbs, posteriorCountsTbl);
    }

    // step-2: rerank the nbest
    /**
     * TODO: zhifei: now the re-ranking takes O(n^2) where n is the size of the nbest. But, we can
     * significantly speed up this (leadding to O(n)) by first estimating a model on nbest, and then
     * rerank the nbest using the estimated model.
     */
    double bestGain = -1000000000; // set as worst gain
    String bestHyp = null;
    List<Double> gains = new ArrayList<Double>();
    for (int i = 0; i < hypsItself.size(); i++) {
      String curHyp = hypsItself.get(i);
      int curHypLen = sentLens.get(i);
      HashMap<String, Integer> curHypNgramTbl = ngramTbls.get(i);
      // double cur_gain = computeGain(cur_hyp, l_hyp_itself, l_normalized_probs);
      double curGain = 0;
      if (useGoogleLinearCorpusGain) {
        curGain = computeExpectedLinearCorpusGain(curHypLen, curHypNgramTbl, posteriorCountsTbl);
      } else {
        curGain =
            computeExpectedGain(curHypLen, curHypNgramTbl, ngramTbls, sentLens, normalizedProbs);
      }

      gains.add(curGain);
      if (i == 0 || curGain > bestGain) { // maximize
        bestGain = curGain;
        bestHyp = curHyp;
      }
    }

    // step-3: output the 1best or nbest
    if (this.produceRerankedNbest) {
      // TOTO: sort the list and write the reranked nbest; Use Collections.sort(List list,
      // Comparator c)
    } else {
      /*
       * this.out.write(best_hyp); this.out.write("\n"); out.flush();
       */
    }

    System.err.println("best gain: " + bestGain);
    if (null == bestHyp) {
      throw new RuntimeException("mbr reranked one best is null, must be wrong");
    }
    return bestHyp;
  }