@Override public void annotate(Blackboard blackboard, DocumentComponent component) { // get the grams and order them by score Collection<Gram> grams = blackboard.getKeyphrases(); Map<Keyphrase, Double> scoredGrams = new HashMap<>(); for (Gram g : grams) { Keyphrase k = (Keyphrase) g; scoredGrams.put(k, k.getFeature(GenericEvaluatorAnnotator.SCORE)); } List<Map.Entry<Keyphrase, Double>> gramsToTrash = scoredGrams .entrySet() .stream() .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) .collect(Collectors.toList()); // now the have the gram ordered by score. // we want to keep at least the first 5%; then, we look for the maximum // steep in score in the following 20%, and we discard everything after // that remains. double bestRange = Math.ceil((gramsToTrash.size() * 5.0) / 100.0); double steepRange = Math.ceil((gramsToTrash.size() * 25.0) / 100.0); // keep the first 5% for (int i = 0; i < bestRange; i++) { gramsToTrash.remove(0); } double maxSteep = Double.MIN_VALUE; int maxSteepIndex = Integer.MIN_VALUE; // search for the maximum steep in the next 15% for (int i = 0; i < steepRange - 1; i++) { double steep = (gramsToTrash.get(i).getValue() - gramsToTrash.get(i + 1).getValue()); if (steep > maxSteep) { maxSteep = steep; maxSteepIndex = i; } } // keep the grams before the steep for (int i = 0; i < maxSteepIndex; i++) { gramsToTrash.remove(0); } // now remove the remaining grams from the blackboard. for (Map.Entry<Keyphrase, Double> e : gramsToTrash) blackboard.removeKeyphrase(e.getKey()); }
@Override default void run(Blackboard b) { this.annotate(b, b.getStructure()); }