/** * Inserts the classifier in the population. Before that, it looks if there is a classifier in the * population with the same action and condition (in this case, increments its numerosity). After, * it checks that the number of micro classifiers is less than the maximum population size. If it * isn't, it deletes one classifier from the population calling the deleteClassifier function. It * inserts the classifier in the population and in the action set if it's not null. * * @param cl is the classifier that has to be inserted in the population. * @param ASet Population where the classifier will be inserted. */ public void insertInPopulation(Classifier cl, Population ASet) { boolean found = false; int i = 0; while (i < macroClSum && !found) { if (set[i].equals(cl)) { set[i].increaseNumerosity(cl.getNumerosity()); microClSum += cl.getNumerosity(); if (ASet != null) { if (ASet.isThereClassifier(set[i]) >= 0) ASet.microClSum += cl.getNumerosity(); } found = true; } i++; } if (!found) { addClassifier(cl); } // Here, the classifier has been added to the population if (microClSum > Config.popSize) { // If we have inserted to many classifiers, we have to delete one. deleteClFromPopulation(ASet); } } // end insertInPopulation
/** * Inserts the classifier into the population. Before, it looks if there is a classifier in the * population that can subsume the new one (in this case, increments its numerosity). After, it * checks that the number of micro classifiers is less than the maximum population size. If it * isn't, it deletes one classifier of the population calling the deleteClassifier function. It * inserts the classifier in the population and in the action set if it's not null. * * @param cl is the classifier that has to be inserted in the population. * @param ASet Population where the classifier will be inserted. */ public void insertInPSubsumingCl(Classifier cl, Population ASet) { int i = 0; Classifier bestSubsumer = null; Classifier equalClassifier = null; // We look for the best subsumer or for an equal classifier. while (i < macroClSum) { if (set[i].couldSubsume() && set[i].isMoreGeneral(cl)) { if (bestSubsumer == null) bestSubsumer = set[i]; else if (set[i].isMoreGeneral(bestSubsumer)) bestSubsumer = set[i]; } if (set[i].equals(cl)) equalClassifier = set[i]; i++; } // If there is a subsumer, its numerosity is increased. if (bestSubsumer != null) { bestSubsumer.increaseNumerosity(cl.getNumerosity()); microClSum += cl.getNumerosity(); } else if (equalClassifier != null) { equalClassifier.increaseNumerosity(cl.getNumerosity()); microClSum += cl.getNumerosity(); } else { addClassifier(cl); } // There's no classifier deletion, independent of if the maximum size // has been overcomen } // end insertInPSubsumingCl
/** * Deletes one classifier from the population. After that, if the population passed as a parameter * is not null, it looks for the deleted classifier. If it is in the second population, it will * delete it too. * * @param aSet is the population where the deleted classifier has to be searched. * @return a Classifier that contains the deleted classifier. */ public Classifier deleteClFromPopulation(Population aSet) { // A classifier has been deleted from the population Classifier clDeleted = deleteClassifier(); if (aSet != null) { // Now, this classifier has to be deleted from the action set (if it exists in). int pos = aSet.isThereClassifier(clDeleted); // It is searched in the action set. if (pos >= 0) { // It has to be deleted from the action set too. aSet.microClSum--; // If the classifier has 0 numerosity, we remove it from the population. if (clDeleted.getNumerosity() == 0) { // It has to be completely deleted from action set. aSet.macroClSum--; // Decrements the number of macroclassifiers aSet.set[pos] = aSet.set[aSet.macroClSum]; // Moves the last classifier to the deleted one aSet.set[aSet.macroClSum] = null; // Puts the last classifier to null. } } } return clDeleted; } // end deleteClFromPopulation
/** This method applies the action set subsumption */ public void doActionSetSubsumption() { int i, pos = 0; Classifier cl = null; for (i = 0; i < macroClSum; i++) { if (set[i].couldSubsume()) { if (cl == null || set[i].numberOfDontCareSymbols() > cl.numberOfDontCareSymbols() || (set[i].numberOfDontCareSymbols() == cl.numberOfDontCareSymbols() && Config.rand() < 0.5)) { cl = set[i]; pos = i; } } } if (cl != null) { for (i = 0; i < macroClSum; i++) { if (cl != set[i] && cl.isMoreGeneral(set[i])) { cl.increaseNumerosity(set[i].getNumerosity()); // Now, the classifier has to be removed from the actionSet and the population. // It's deleted from the action set. Classifier clDeleted = set[i]; deleteClassifier(i); // And now, it's deleted from the population Population p = parentRef; while (p.parentRef != null) { p = p.parentRef; } pos = p.isThereClassifier( clDeleted); // The classifier is searched in the initial population. if (pos >= 0) p.deleteClassifier(pos); } } } } // end doActionSetSubsumption
/** * Returns if the classifier of the class subsumes the classifier passed as a parameter. * * @param cl is the subsumed classifier. * @return a boolean indicating if it subsumes */ public boolean doesSubsume(Classifier cl) { int i; // First, check if the condition is the same if (action != cl.getAction()) return false; // Then, check that is more general if (parameters.couldSubsume()) { for (i = 0; i < rep.length; i++) { if (!rep[i].isMoreGeneral(cl.rep[i])) return false; } return true; } return false; } // end doesSubsume
/** * Adds a classifier in the population. * * @param cl is the new classifier to be added. */ public void addClassifier(Classifier cl) { try { set[macroClSum] = cl; microClSum += cl.getNumerosity(); macroClSum++; } catch (Exception e) { System.out.println( "Exception in the insertion of a new classifier. The macroClSum is : " + macroClSum + " and the microClsum: " + microClSum); System.out.println( "And the maximum number of classifiers in the population is: " + Config.popSize); e.printStackTrace(); } } // end addClassifier
/** * Returns if the classifier of the class is equal to the classifier given as a parameter. * * @param cl is a classifier. * @return a boolean indicating if they are equals. */ public boolean equals(Classifier cl) { int i; try { // Checking the action if (action != cl.getAction()) return false; // Checking the condition for (i = 0; i < rep.length; i++) { if (!rep[i].equals(cl.rep[i])) return false; } return true; } catch (Exception e) { return false; } } // end equals
/** * Applies crossover. It generates two children. * * @param parent1 is the first parent. * @param parent2 is the second parent * @param child1 is the first child * @param child2 is the second child. */ public void makeCrossover( Classifier parent1, Classifier parent2, Classifier child1, Classifier child2) { int i = 0; // cross1 is a number between [0.. clLength-1] int cross1 = (int) (Config.rand() * (double) Config.clLength); // cross2 is a number between [1..clLenght]. int cross2 = (int) (Config.rand() * (double) Config.clLength) + 1; if (cross1 > cross2) { int aux = cross2; cross2 = cross1; cross1 = aux; // In the else-if condition is not necessary to check if (cross2<clLength) // to increment the point, because cross1 [0..length-1] } else if (cross1 == cross2) cross2++; // All the intervals (real representation) or genes (ternary representation) that // are not in the cross point are crossed. if (!Config.ternaryRep) { for (i = cross1 + 1; i < cross2 - 1; i++) { child2.setAllele(i, parent1); child1.setAllele(i, parent2); } // Now we have to cross the border allele child1.crossAllele(cross1, parent1, parent2); child1.crossAllele(cross2 - 1, parent2, parent1); child2.crossAllele(cross1, parent2, parent1); child2.crossAllele(cross2 - 1, parent1, parent2); } else { for (i = cross1; i < cross2 - 1; i++) { child2.setAllele(i, parent1); child1.setAllele(i, parent2); } } } // end makeCrossover
/** * The core implementation of the search. * * @param root The root word to search from. Traditionally, this is the root of the sentence. * @param candidateFragments The callback for the resulting sentence fragments. This is a * predicate of a triple of values. The return value of the predicate determines whether we * should continue searching. The triple is a triple of * <ol> * <li>The log probability of the sentence fragment, according to the featurizer and the * weights * <li>The features along the path to this fragment. The last element of this is the * features from the most recent step. * <li>The sentence fragment. Because it is relatively expensive to compute the resulting * tree, this is returned as a lazy {@link Supplier}. * </ol> * * @param classifier The classifier for whether an arc should be on the path to a clause split, a * clause split itself, or neither. * @param featurizer The featurizer to use. Make sure this matches the weights! * @param actionSpace The action space we are allowed to take. Each action defines a means of * splitting a clause on a dependency boundary. */ protected void search( // The root to search from IndexedWord root, // The output specs final Predicate<Triple<Double, List<Counter<String>>, Supplier<SentenceFragment>>> candidateFragments, // The learning specs final Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier, Map<String, ? extends List<String>> hardCodedSplits, final Function<Triple<State, Action, State>, Counter<String>> featurizer, final Collection<Action> actionSpace, final int maxTicks) { // (the fringe) PriorityQueue<Pair<State, List<Counter<String>>>> fringe = new FixedPrioritiesPriorityQueue<>(); // (avoid duplicate work) Set<IndexedWord> seenWords = new HashSet<>(); State firstState = new State(null, null, -9000, null, x -> {}, true); // First state is implicitly "done" fringe.add(Pair.makePair(firstState, new ArrayList<>(0)), -0.0); int ticks = 0; while (!fringe.isEmpty()) { if (++ticks > maxTicks) { // System.err.println("WARNING! Timed out on search with " + ticks + " ticks"); return; } // Useful variables double logProbSoFar = fringe.getPriority(); assert logProbSoFar <= 0.0; Pair<State, List<Counter<String>>> lastStatePair = fringe.removeFirst(); State lastState = lastStatePair.first; List<Counter<String>> featuresSoFar = lastStatePair.second; IndexedWord rootWord = lastState.edge == null ? root : lastState.edge.getDependent(); // Register thunk if (lastState.isDone) { if (!candidateFragments.test( Triple.makeTriple( logProbSoFar, featuresSoFar, () -> { SemanticGraph copy = new SemanticGraph(tree); lastState .thunk .andThen( x -> { // Add the extra edges back in, if they don't break the tree-ness of the // extraction for (IndexedWord newTreeRoot : x.getRoots()) { if (newTreeRoot != null) { // what a strange thing to have happen... for (SemanticGraphEdge extraEdge : extraEdgesByGovernor.get(newTreeRoot)) { assert Util.isTree(x); //noinspection unchecked addSubtree( x, newTreeRoot, extraEdge.getRelation().toString(), tree, extraEdge.getDependent(), tree.getIncomingEdgesSorted(newTreeRoot)); assert Util.isTree(x); } } } }) .accept(copy); return new SentenceFragment(copy, assumedTruth, false); }))) { break; } } // Find relevant auxilliary terms SemanticGraphEdge subjOrNull = null; SemanticGraphEdge objOrNull = null; for (SemanticGraphEdge auxEdge : tree.outgoingEdgeIterable(rootWord)) { String relString = auxEdge.getRelation().toString(); if (relString.contains("obj")) { objOrNull = auxEdge; } else if (relString.contains("subj")) { subjOrNull = auxEdge; } } // Iterate over children // For each outgoing edge... for (SemanticGraphEdge outgoingEdge : tree.outgoingEdgeIterable(rootWord)) { // Prohibit indirect speech verbs from splitting off clauses // (e.g., 'said', 'think') // This fires if the governor is an indirect speech verb, and the outgoing edge is a ccomp if (outgoingEdge.getRelation().toString().equals("ccomp") && ((outgoingEdge.getGovernor().lemma() != null && INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().lemma())) || INDIRECT_SPEECH_LEMMAS.contains(outgoingEdge.getGovernor().word()))) { continue; } // Get some variables String outgoingEdgeRelation = outgoingEdge.getRelation().toString(); List<String> forcedArcOrder = hardCodedSplits.get(outgoingEdgeRelation); if (forcedArcOrder == null && outgoingEdgeRelation.contains(":")) { forcedArcOrder = hardCodedSplits.get( outgoingEdgeRelation.substring(0, outgoingEdgeRelation.indexOf(":")) + ":*"); } boolean doneForcedArc = false; // For each action... for (Action action : (forcedArcOrder == null ? actionSpace : orderActions(actionSpace, forcedArcOrder))) { // Check the prerequisite if (!action.prerequisitesMet(tree, outgoingEdge)) { continue; } if (forcedArcOrder != null && doneForcedArc) { break; } // 1. Compute the child state Optional<State> candidate = action.applyTo(tree, lastState, outgoingEdge, subjOrNull, objOrNull); if (candidate.isPresent()) { double logProbability; ClauseClassifierLabel bestLabel; Counter<String> features = featurizer.apply(Triple.makeTriple(lastState, action, candidate.get())); if (forcedArcOrder != null && !doneForcedArc) { logProbability = 0.0; bestLabel = ClauseClassifierLabel.CLAUSE_SPLIT; doneForcedArc = true; } else if (features.containsKey("__undocumented_junit_no_classifier")) { logProbability = Double.NEGATIVE_INFINITY; bestLabel = ClauseClassifierLabel.CLAUSE_INTERM; } else { Counter<ClauseClassifierLabel> scores = classifier.scoresOf(new RVFDatum<>(features)); if (scores.size() > 0) { Counters.logNormalizeInPlace(scores); } String rel = outgoingEdge.getRelation().toString(); if ("nsubj".equals(rel) || "dobj".equals(rel)) { scores.remove( ClauseClassifierLabel.NOT_A_CLAUSE); // Always at least yield on nsubj and dobj } logProbability = Counters.max(scores, Double.NEGATIVE_INFINITY); bestLabel = Counters.argmax(scores, (x, y) -> 0, ClauseClassifierLabel.CLAUSE_SPLIT); } if (bestLabel != ClauseClassifierLabel.NOT_A_CLAUSE) { Pair<State, List<Counter<String>>> childState = Pair.makePair( candidate.get().withIsDone(bestLabel), new ArrayList<Counter<String>>(featuresSoFar) { { add(features); } }); // 2. Register the child state if (!seenWords.contains(childState.first.edge.getDependent())) { // System.err.println(" pushing " + action.signature() + " with " + // argmax.first.edge); fringe.add(childState, logProbability); } } } } } seenWords.add(rootWord); } // System.err.println("Search finished in " + ticks + " ticks and " + classifierEvals + " // classifier evaluations."); }