Пример #1
0
    /** Compute the key parameters of the NGramsOrder model. */
    private void compute() {

      int oneGramsCounter = 0;
      int nGramsCounter = 0;

      /*
       * initialize the statistics for NGram
       */
      NLabeledNGramsStats[] nGramsStat = new NLabeledNGramsStats[MAX_ORDER_NGRAM_STATS];
      NLabeled1GramsStats[] oneGramsStat = new NLabeled1GramsStats[MAX_ORDER_NGRAM_STATS];
      for (int k = 0; k < MAX_ORDER_NGRAM_STATS; k++) {
        nGramsStat[k] = new NLabeledNGramsStats(k);
        oneGramsStat[k] = new NLabeled1GramsStats(k);
      }

      int nGramRank = 0;
      for (CNGramsStats nGramsFrequencyStat : _nGramsFrequencyStatsList) {
        nGramRank = nGramsFrequencyStat.getNGramRank();

        /*
         * Collect only lower order NGram labels
         */
        if (nGramRank < MAX_ORDER_NGRAM_STATS) {
          if (nGramsFrequencyStat.isCompound()) {
            nGramsStat[nGramRank].add(
                nGramsFrequencyStat.getNumNGramOccurrences(),
                nGramsFrequencyStat.isNNP(),
                nGramsFrequencyStat.getMaxNumTermOccurrences());
            nGramsCounter++;
          } else {
            oneGramsStat[nGramRank].add(
                nGramsFrequencyStat.getNumNGramOccurrences(), nGramsFrequencyStat.isNNP(), -1);
            oneGramsCounter++;
          }
        }
      }

      CNGramsModel instance = CNGramsModel.getInstance();

      /*
       * Normalize to compute the average values..
       */
      StringBuilder buf = new StringBuilder();
      buf.append(CFileUtil.COMMENTS_FIRST_CHAR);
      buf.append(" nnpFudge:");
      buf.append(instance.getTagEstimator());
      buf.append(" compoundFudge:");
      buf.append(instance.getMaxTermFreqEstimator());
      buf.append("\n");

      buf.append(CFileUtil.COMMENTS_FIRST_CHAR);
      buf.append("1-Gram frequency model parameters\n");
      for (int k = 0; k < MAX_ORDER_NGRAM_STATS; k++) {
        if (oneGramsStat[k].compute(oneGramsCounter)) {
          buf.append(oneGramsStat[k].toString());
          buf.append("\n");
        }
      }
      buf.append(CEnv.ENTRIES_DELIM);
      buf.append("\n");
      buf.append(CFileUtil.COMMENTS_FIRST_CHAR);
      buf.append("N-Gram frequency model parameters\n");
      for (int k = 0; k < MAX_ORDER_NGRAM_STATS; k++) {
        if (nGramsStat[k].compute(nGramsCounter)) {
          buf.append(nGramsStat[k].toString());
          buf.append("\n");
        }
      }
      buf.append(CEnv.ENTRIES_DELIM);

      try {
        final String fileName = TRAINING_FILES + "_" + _fileIndex;
        CFileUtil.write(fileName, buf.toString());
      } catch (IOException e) {
        CLogger.error(e.toString());
      }
    }