public static Set<String> featureWhiteList(FlatNBestList nbest, int minSegmentCount) { List<List<ScoredFeaturizedTranslation<IString, String>>> nbestlists = nbest.nbestLists(); Counter<String> featureSegmentCounts = new ClassicCounter<String>(); for (List<ScoredFeaturizedTranslation<IString, String>> nbestlist : nbestlists) { Set<String> segmentFeatureSet = new HashSet<String>(); for (ScoredFeaturizedTranslation<IString, String> trans : nbestlist) { for (FeatureValue<String> feature : trans.features) { segmentFeatureSet.add(feature.name); } } for (String featureName : segmentFeatureSet) { featureSegmentCounts.incrementCount(featureName); } } return Counters.keysAbove(featureSegmentCounts, minSegmentCount - 1); }
/** * Update an existing feature whitelist according to nbestlists. Then return the features that * appear more than minSegmentCount times. * * @param featureWhitelist * @param nbestlists * @param minSegmentCount * @return features that appear more than minSegmentCount times */ public static Set<String> updatefeatureWhiteList( Counter<String> featureWhitelist, List<List<RichTranslation<IString, String>>> nbestlists, int minSegmentCount) { for (List<RichTranslation<IString, String>> nbestlist : nbestlists) { Set<String> segmentFeatureSet = new HashSet<String>(1000); for (RichTranslation<IString, String> trans : nbestlist) { for (FeatureValue<String> feature : trans.features) { if (!segmentFeatureSet.contains(feature.name)) { segmentFeatureSet.add(feature.name); featureWhitelist.incrementCount(feature.name); } } } } return Counters.keysAbove(featureWhitelist, minSegmentCount - 1); }