示例#1
0
  public NgramLanguageModel(
      Indexer<String> charIndexer,
      CountDbBig[] countDbs,
      Set<Integer> activeCharacters,
      LMType type,
      double lmPower) {
    this.charIndexer = charIndexer;
    this.countDbs = countDbs;
    this.maxOrder = countDbs.length;
    this.type = type;
    this.lmPower = lmPower;
    this.allContextsSet = new HashSet<LongArrWrapper>();
    this.allContexts = new ArrayList<int[]>();
    for (int i = 0; i < this.maxOrder - 1; i++) {
      for (long[] key : countDbs[i].getKeys()) {
        if (key != null && countDbs[i].getCount(key, CountType.HISTORY_TYPE_INDEX) > 0) {
          allContextsSet.add(new LongArrWrapper(key));
          allContexts.add(LongNgram.convertToIntArr(key));
        }
      }
    }

    if (activeCharacters == null) throw new RuntimeException("activeCharacters is null!");
    this.activeCharacters = activeCharacters;
  }
示例#2
0
 public void checkNormalizes(int[] context) {
   double totalProb = 0;
   for (int i = 0; i < charIndexer.size(); i++) {
     totalProb += getCharNgramProb(context, i);
   }
   System.out.println(
       "Total prob for context " + LongNgram.toString(context, charIndexer) + ": " + totalProb);
 }
示例#3
0
 public boolean containsContext(int[] context) {
   if (context.length == 0) return true;
   else return allContextsSet.contains(new LongArrWrapper(LongNgram.convertToLong(context)));
 }