コード例 #1
0
ファイル: FeatureGen.java プロジェクト: NguyenAnhDuc/fpt-qa
  // generating features
  public void generateFeatures() {
    if (features != null) {
      features.clear();
    } else {
      features = new ArrayList();
    }

    if (fmap != null) {
      fmap.clear();
    } else {
      fmap = new HashMap();
    }

    if (currentFeatures != null) {
      currentFeatures.clear();
    } else {
      currentFeatures = new ArrayList();
    }

    if (data.trnData == null || dict.dict == null) {
      System.out.println("No data or dictionary for generating features");
      return;
    }

    // scan over data list
    for (int i = 0; i < data.trnData.size(); i++) {
      Observation obsr = (Observation) data.trnData.get(i);

      for (int j = 0; j < obsr.cps.length; j++) {
        Element elem;
        CountFIdx cntFIdx;

        elem = (Element) dict.dict.get(new Integer(obsr.cps[j]));
        if (elem != null) {
          if (elem.count <= option.cpRareThreshold) {
            // skip this context predicate, it is too rare
            continue;
          }

          cntFIdx = (CountFIdx) elem.lbCntFidxes.get(new Integer(obsr.humanLabel));
          if (cntFIdx != null) {
            if (cntFIdx.count <= option.fRareThreshold) {
              // skip this feature, it is too rare
              continue;
            }

          } else {
            // not found in the dictionary, then skip
            continue;
          }

        } else {
          // not found in the dictionary, then skip
          continue;
        }

        // update the feature
        Feature f = new Feature(obsr.humanLabel, obsr.cps[j]);
        f.strId2Idx(fmap);
        if (f.idx < 0) {
          // new feature, add to the feature list
          addFeature(f);

          // update the feature index in the dictionary
          cntFIdx.fidx = f.idx;
          elem.chosen = 1;
        }
      }
    }

    option.numFeatures = features.size();
  }