public SentenceStatistics mean() { double sumConfidence = 0; int countWithConfidence = 0; Counter<String> avePredictions = new ClassicCounter<>(MapFactory.<String, MutableDouble>linkedHashMapFactory()); // Sum for (SentenceStatistics stat : this.statisticsForClassifiers) { for (Double confidence : stat.confidence) { sumConfidence += confidence; countWithConfidence += 1; } assert Math.abs(stat.relationDistribution.totalCount() - 1.0) < 1e-5; for (Map.Entry<String, Double> entry : stat.relationDistribution.entrySet()) { assert entry.getValue() >= 0.0; assert entry.getValue() == stat.relationDistribution.getCount(entry.getKey()); avePredictions.incrementCount(entry.getKey(), entry.getValue()); assert stat.relationDistribution.getCount(entry.getKey()) == stat.relationDistribution.getCount(entry.getKey()); } } // Normalize double aveConfidence = sumConfidence / ((double) countWithConfidence); // Return if (this.statisticsForClassifiers.size() > 1) { Counters.divideInPlace(avePredictions, (double) this.statisticsForClassifiers.size()); } if (Math.abs(avePredictions.totalCount() - 1.0) > 1e-5) { throw new IllegalStateException("Mean relation distribution is not a distribution!"); } assert this.statisticsForClassifiers.size() > 1 || this.statisticsForClassifiers.size() == 0 || Counters.equals( avePredictions, statisticsForClassifiers.iterator().next().relationDistribution, 1e-5); return countWithConfidence > 0 ? new SentenceStatistics(avePredictions, aveConfidence) : new SentenceStatistics(avePredictions); }
/** * Search from the root of the tree. This function also defines the default action space to use * during search. This is NOT recommended to be used at test time. * * @see edu.stanford.nlp.naturalli.ClauseSplitterSearchProblem#search(Predicate) * @param candidateFragments The callback function. * @param classifier The classifier for whether an arc should be on the path to a clause split, a * clause split itself, or neither. * @param featurizer The featurizer to use during search, to be dot producted with the weights. */ public void search( // The output specs final Predicate<Triple<Double, List<Counter<String>>, Supplier<SentenceFragment>>> candidateFragments, // The learning specs final Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier, final Map<String, List<String>> hardCodedSplits, final Function<Triple<State, Action, State>, Counter<String>> featurizer, final int maxTicks) { Collection<Action> actionSpace = new ArrayList<>(); // SIMPLE SPLIT actionSpace.add( new Action() { @Override public String signature() { return "simple"; } @Override public boolean prerequisitesMet(SemanticGraph originalTree, SemanticGraphEdge edge) { char tag = edge.getDependent().tag().charAt(0); return !(tag != 'V' && tag != 'N' && tag != 'J' && tag != 'P' && tag != 'D'); } @Override public Optional<State> applyTo( SemanticGraph tree, State source, SemanticGraphEdge outgoingEdge, SemanticGraphEdge subjectOrNull, SemanticGraphEdge objectOrNull) { return Optional.of( new State( outgoingEdge, subjectOrNull == null ? source.subjectOrNull : subjectOrNull, subjectOrNull == null ? (source.distanceFromSubj + 1) : 0, objectOrNull == null ? source.objectOrNull : objectOrNull, source.thunk.andThen( toModify -> { assert Util.isTree(toModify); simpleClause(toModify, outgoingEdge); if (outgoingEdge.getRelation().toString().endsWith("comp")) { stripAuxMark(toModify); } assert Util.isTree(toModify); }), false)); } }); // CLONE ROOT actionSpace.add( new Action() { @Override public String signature() { return "clone_root_as_nsubjpass"; } @Override public boolean prerequisitesMet(SemanticGraph originalTree, SemanticGraphEdge edge) { // Only valid if there's a single nontrivial outgoing edge from a node. Otherwise it's a // whole can of worms. Iterator<SemanticGraphEdge> iter = originalTree.outgoingEdgeIterable(edge.getGovernor()).iterator(); if (!iter.hasNext()) { return false; // what? } boolean nontrivialEdge = false; while (iter.hasNext()) { SemanticGraphEdge outEdge = iter.next(); switch (outEdge.getRelation().toString()) { case "nn": case "amod": break; default: if (nontrivialEdge) { return false; } nontrivialEdge = true; } } return true; } @Override public Optional<State> applyTo( SemanticGraph tree, State source, SemanticGraphEdge outgoingEdge, SemanticGraphEdge subjectOrNull, SemanticGraphEdge objectOrNull) { return Optional.of( new State( outgoingEdge, subjectOrNull == null ? source.subjectOrNull : subjectOrNull, subjectOrNull == null ? (source.distanceFromSubj + 1) : 0, objectOrNull == null ? source.objectOrNull : objectOrNull, source.thunk.andThen( toModify -> { assert Util.isTree(toModify); simpleClause(toModify, outgoingEdge); addSubtree( toModify, outgoingEdge.getDependent(), "nsubjpass", tree, outgoingEdge.getGovernor(), Collections.singleton(outgoingEdge)); // addWord(toModify, outgoingEdge.getDependent(), "auxpass", // mockNode(outgoingEdge.getDependent().backingLabel(), "is", "VBZ")); assert Util.isTree(toModify); }), true)); } }); // COPY SUBJECT actionSpace.add( new Action() { @Override public String signature() { return "clone_nsubj"; } @Override public boolean prerequisitesMet(SemanticGraph originalTree, SemanticGraphEdge edge) { // Don't split into anything but verbs or nouns char tag = edge.getDependent().tag().charAt(0); if (tag != 'V' && tag != 'N') { return false; } for (SemanticGraphEdge grandchild : originalTree.outgoingEdgeIterable(edge.getDependent())) { if (grandchild.getRelation().toString().contains("subj")) { return false; } } return true; } @Override public Optional<State> applyTo( SemanticGraph tree, State source, SemanticGraphEdge outgoingEdge, SemanticGraphEdge subjectOrNull, SemanticGraphEdge objectOrNull) { if (subjectOrNull != null && !outgoingEdge.equals(subjectOrNull)) { return Optional.of( new State( outgoingEdge, subjectOrNull, 0, objectOrNull == null ? source.objectOrNull : objectOrNull, source.thunk.andThen( toModify -> { assert Util.isTree(toModify); simpleClause(toModify, outgoingEdge); addSubtree( toModify, outgoingEdge.getDependent(), "nsubj", tree, subjectOrNull.getDependent(), Collections.singleton(outgoingEdge)); assert Util.isTree(toModify); stripAuxMark(toModify); assert Util.isTree(toModify); }), false)); } else { return Optional.empty(); } } }); // COPY OBJECT actionSpace.add( new Action() { @Override public String signature() { return "clone_dobj"; } @Override public boolean prerequisitesMet(SemanticGraph originalTree, SemanticGraphEdge edge) { // Don't split into anything but verbs or nouns char tag = edge.getDependent().tag().charAt(0); if (tag != 'V' && tag != 'N') { return false; } for (SemanticGraphEdge grandchild : originalTree.outgoingEdgeIterable(edge.getDependent())) { if (grandchild.getRelation().toString().contains("subj")) { return false; } } return true; } @Override public Optional<State> applyTo( SemanticGraph tree, State source, SemanticGraphEdge outgoingEdge, SemanticGraphEdge subjectOrNull, SemanticGraphEdge objectOrNull) { if (objectOrNull != null && !outgoingEdge.equals(objectOrNull)) { return Optional.of( new State( outgoingEdge, subjectOrNull == null ? source.subjectOrNull : subjectOrNull, subjectOrNull == null ? (source.distanceFromSubj + 1) : 0, objectOrNull, source.thunk.andThen( toModify -> { assert Util.isTree(toModify); // Split the clause simpleClause(toModify, outgoingEdge); // Attach the new subject addSubtree( toModify, outgoingEdge.getDependent(), "nsubj", tree, objectOrNull.getDependent(), Collections.singleton(outgoingEdge)); // Strip bits we don't want assert Util.isTree(toModify); stripAuxMark(toModify); assert Util.isTree(toModify); }), false)); } else { return Optional.empty(); } } }); for (IndexedWord root : tree.getRoots()) { search( root, candidateFragments, classifier, hardCodedSplits, featurizer, actionSpace, maxTicks); } }
/** * A helper to add an entire subtree to a given dependency tree. * * @param toModify The tree to add the subtree to. * @param root The root of the tree where we should be adding the subtree. * @param rel The relation to add the subtree with. * @param originalTree The orignal tree (i.e., {@link ClauseSplitterSearchProblem#tree}). * @param subject The root of the clause to add. * @param ignoredEdges The edges to ignore adding when adding this subtree. */ private static void addSubtree( SemanticGraph toModify, IndexedWord root, String rel, SemanticGraph originalTree, IndexedWord subject, Collection<SemanticGraphEdge> ignoredEdges) { if (toModify.containsVertex(subject)) { return; // This subtree already exists. } Queue<IndexedWord> fringe = new LinkedList<>(); Collection<IndexedWord> wordsToAdd = new ArrayList<>(); Collection<SemanticGraphEdge> edgesToAdd = new ArrayList<>(); // Search for subtree to add for (SemanticGraphEdge edge : originalTree.outgoingEdgeIterable(subject)) { if (!ignoredEdges.contains(edge)) { if (toModify.containsVertex(edge.getDependent())) { // Case: we're adding a subtree that's not disjoint from toModify. This is bad news. return; } edgesToAdd.add(edge); fringe.add(edge.getDependent()); } } while (!fringe.isEmpty()) { IndexedWord node = fringe.poll(); wordsToAdd.add(node); for (SemanticGraphEdge edge : originalTree.outgoingEdgeIterable(node)) { if (!ignoredEdges.contains(edge)) { if (toModify.containsVertex(edge.getDependent())) { // Case: we're adding a subtree that's not disjoint from toModify. This is bad news. return; } edgesToAdd.add(edge); fringe.add(edge.getDependent()); } } } // Add subtree // (add subject) toModify.addVertex(subject); toModify.addEdge( root, subject, GrammaticalRelation.valueOf(Language.English, rel), Double.NEGATIVE_INFINITY, false); // (add nodes) wordsToAdd.forEach(toModify::addVertex); // (add edges) for (SemanticGraphEdge edge : edgesToAdd) { assert !toModify.incomingEdgeIterator(edge.getDependent()).hasNext(); toModify.addEdge( edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra()); } }
public void addInPlace(SentenceStatistics stats) { statisticsForClassifiers.add(stats); }