예제 #1
0
 public static Set<String> featureWhiteList(FlatNBestList nbest, int minSegmentCount) {
   List<List<ScoredFeaturizedTranslation<IString, String>>> nbestlists = nbest.nbestLists();
   Counter<String> featureSegmentCounts = new ClassicCounter<String>();
   for (List<ScoredFeaturizedTranslation<IString, String>> nbestlist : nbestlists) {
     Set<String> segmentFeatureSet = new HashSet<String>();
     for (ScoredFeaturizedTranslation<IString, String> trans : nbestlist) {
       for (FeatureValue<String> feature : trans.features) {
         segmentFeatureSet.add(feature.name);
       }
     }
     for (String featureName : segmentFeatureSet) {
       featureSegmentCounts.incrementCount(featureName);
     }
   }
   return Counters.keysAbove(featureSegmentCounts, minSegmentCount - 1);
 }
예제 #2
0
 /**
  * Update an existing feature whitelist according to nbestlists. Then return the features that
  * appear more than minSegmentCount times.
  *
  * @param featureWhitelist
  * @param nbestlists
  * @param minSegmentCount
  * @return features that appear more than minSegmentCount times
  */
 public static Set<String> updatefeatureWhiteList(
     Counter<String> featureWhitelist,
     List<List<RichTranslation<IString, String>>> nbestlists,
     int minSegmentCount) {
   for (List<RichTranslation<IString, String>> nbestlist : nbestlists) {
     Set<String> segmentFeatureSet = new HashSet<String>(1000);
     for (RichTranslation<IString, String> trans : nbestlist) {
       for (FeatureValue<String> feature : trans.features) {
         if (!segmentFeatureSet.contains(feature.name)) {
           segmentFeatureSet.add(feature.name);
           featureWhitelist.incrementCount(feature.name);
         }
       }
     }
   }
   return Counters.keysAbove(featureWhitelist, minSegmentCount - 1);
 }