Пример #1
0
    public void add(CNGramsStats nGramsFreqStats) {

      String label = nGramsFreqStats.getLabel();
      float tagEstimator = CNGramsModel.getInstance().getTagEstimator();
      float maxTermFreqEstimator = CNGramsModel.getInstance().getMaxTermFreqEstimator();

      double[] allDataset = new double[5];
      double isNNPValue = nGramsFreqStats.isNNP() ? 1.0 : 0.0;
      allDataset[0] = nGramsFreqStats.getNGramRank();
      allDataset[1] = tagEstimator;
      allDataset[2] = maxTermFreqEstimator;
      allDataset[3] = isNNPValue;
      allDataset[4] = (label.split(" ").length > 1) ? 1.0 : 0.0;

      if (label.split(" ").length > 1) {
        allDataset[4] = 1.0;
        double[] nGramDataset = new double[4];
        nGramDataset[0] = nGramsFreqStats.getNGramRank();
        nGramDataset[1] = tagEstimator;
        nGramDataset[2] = maxTermFreqEstimator;
        nGramDataset[3] = isNNPValue;
        _nGramDatasetsList.add(nGramDataset);
      } else {
        allDataset[4] = 0.0;
        double[] oneGramDataset = new double[4];
        oneGramDataset[0] = nGramsFreqStats.getNGramRank();
        oneGramDataset[1] = tagEstimator;
        oneGramDataset[2] = maxTermFreqEstimator;
        oneGramDataset[3] = isNNPValue;
        _oneGramDatasetsList.add(oneGramDataset);
      }
      _allDatasetsList.add(allDataset);
    }
Пример #2
0
    public void add(CNGramsStats nGramsFreqStats) {
      String label = nGramsFreqStats.getLabel();

      if (label.split(" ").length > 1) {
        _validationNGramRankList.add(Integer.valueOf(nGramsFreqStats.getNGramRank()));
        _validationNGramNNPList.add(Boolean.valueOf(nGramsFreqStats.isNNP()));
      } else {
        _validation1GramRankList.add(Integer.valueOf(nGramsFreqStats.getNGramRank()));
        _validation1GramNNPList.add(Boolean.valueOf(nGramsFreqStats.isNNP()));
      }
    }
Пример #3
0
    /** Compute the key parameters of the NGramsOrder model. */
    private void compute() {

      int oneGramsCounter = 0;
      int nGramsCounter = 0;

      /*
       * initialize the statistics for NGram
       */
      NLabeledNGramsStats[] nGramsStat = new NLabeledNGramsStats[MAX_ORDER_NGRAM_STATS];
      NLabeled1GramsStats[] oneGramsStat = new NLabeled1GramsStats[MAX_ORDER_NGRAM_STATS];
      for (int k = 0; k < MAX_ORDER_NGRAM_STATS; k++) {
        nGramsStat[k] = new NLabeledNGramsStats(k);
        oneGramsStat[k] = new NLabeled1GramsStats(k);
      }

      int nGramRank = 0;
      for (CNGramsStats nGramsFrequencyStat : _nGramsFrequencyStatsList) {
        nGramRank = nGramsFrequencyStat.getNGramRank();

        /*
         * Collect only lower order NGram labels
         */
        if (nGramRank < MAX_ORDER_NGRAM_STATS) {
          if (nGramsFrequencyStat.isCompound()) {
            nGramsStat[nGramRank].add(
                nGramsFrequencyStat.getNumNGramOccurrences(),
                nGramsFrequencyStat.isNNP(),
                nGramsFrequencyStat.getMaxNumTermOccurrences());
            nGramsCounter++;
          } else {
            oneGramsStat[nGramRank].add(
                nGramsFrequencyStat.getNumNGramOccurrences(), nGramsFrequencyStat.isNNP(), -1);
            oneGramsCounter++;
          }
        }
      }

      CNGramsModel instance = CNGramsModel.getInstance();

      /*
       * Normalize to compute the average values..
       */
      StringBuilder buf = new StringBuilder();
      buf.append(CFileUtil.COMMENTS_FIRST_CHAR);
      buf.append(" nnpFudge:");
      buf.append(instance.getTagEstimator());
      buf.append(" compoundFudge:");
      buf.append(instance.getMaxTermFreqEstimator());
      buf.append("\n");

      buf.append(CFileUtil.COMMENTS_FIRST_CHAR);
      buf.append("1-Gram frequency model parameters\n");
      for (int k = 0; k < MAX_ORDER_NGRAM_STATS; k++) {
        if (oneGramsStat[k].compute(oneGramsCounter)) {
          buf.append(oneGramsStat[k].toString());
          buf.append("\n");
        }
      }
      buf.append(CEnv.ENTRIES_DELIM);
      buf.append("\n");
      buf.append(CFileUtil.COMMENTS_FIRST_CHAR);
      buf.append("N-Gram frequency model parameters\n");
      for (int k = 0; k < MAX_ORDER_NGRAM_STATS; k++) {
        if (nGramsStat[k].compute(nGramsCounter)) {
          buf.append(nGramsStat[k].toString());
          buf.append("\n");
        }
      }
      buf.append(CEnv.ENTRIES_DELIM);

      try {
        final String fileName = TRAINING_FILES + "_" + _fileIndex;
        CFileUtil.write(fileName, buf.toString());
      } catch (IOException e) {
        CLogger.error(e.toString());
      }
    }