@Override public void debug() { System.err.println("###################"); System.err.printf( "hypothesis [class=%s,id=%d,pos=%d,expired=%s,pending=%d]: %s\n", getClass(), System.identityHashCode(this), featurizable.targetPosition, hasExpired, pendingPhrases.size(), this); System.err.printf( "parent hypothesis [class=%s,id=%d,pos=%d,expired=%s]: %s\n", preceedingDerivation.getClass(), System.identityHashCode(preceedingDerivation), preceedingDerivation.featurizable.targetPosition, preceedingDerivation.hasExpired(), preceedingDerivation); System.err.println("pendingPhrasesCost: " + pendingPhrasesCost); DTUHypothesis<TK, FV> hyp = this; if (hyp.isDone() != hyp.featurizable.done) { System.err.println("Error in AbstractBeamInferer with: " + hyp); System.err.println("isDone(): " + hyp.isDone()); System.err.println("pending phrases: " + hyp.pendingPhrases.size()); System.err.println("f.done: " + hyp.featurizable.done); Derivation<TK, FV> curHyp = hyp; while (curHyp != null) { System.err.println(" " + curHyp.toString()); curHyp = curHyp.preceedingDerivation; } throw new RuntimeException(); } }
private LispTree exampleToLispTree(ParserState state) { LispTree tree = LispTree.proto.newList(); tree.addChild("example"); // Basic information Example ex = state.ex; if (ex.id != null) tree.addChild(LispTree.proto.newList("id", ex.id)); if (ex.utterance != null) tree.addChild(LispTree.proto.newList("utterance", ex.utterance)); if (ex.targetFormula != null) tree.addChild(LispTree.proto.newList("targetFormula", ex.targetFormula.toLispTree())); if (ex.targetValue != null) tree.addChild(LispTree.proto.newList("targetValue", ex.targetValue.toLispTree())); if (ex.context != null) tree.addChild(ex.context.toLispTree()); // Language info if (ex.languageInfo != null) { if (ex.languageInfo.tokens != null) tree.addChild( LispTree.proto.newList("tokens", LispTree.proto.newList(ex.languageInfo.tokens))); if (ex.languageInfo.lemmaTokens != null) tree.addChild( LispTree.proto.newList( "lemmaTokens", LispTree.proto.newList(ex.languageInfo.lemmaTokens))); if (ex.languageInfo.posTags != null) tree.addChild( LispTree.proto.newList("posTags", LispTree.proto.newList(ex.languageInfo.posTags))); if (ex.languageInfo.nerTags != null) tree.addChild( LispTree.proto.newList("nerTags", LispTree.proto.newList(ex.languageInfo.nerTags))); if (ex.languageInfo.nerValues != null) tree.addChild( LispTree.proto.newList("nerValues", LispTree.proto.newList(ex.languageInfo.nerValues))); } // Derivations LispTree derivations = LispTree.proto.newList(); derivations.addChild("derivations"); List<Derivation> preds = state.predDerivations; for (int i = 0; i < preds.size(); i++) { Derivation deriv = preds.get(i); if (!isPruned(deriv)) { derivations.addChild(deriv.toLispTree()); } } tree.addChild(derivations); return tree; }
/** * Returns true if the two original variables are related in a way that makes subsequence or * subset detection not informative. */ public static boolean isObviousSubSequenceDynamically( Invariant inv, VarInfo subvar, VarInfo supervar) { VarInfo[] vis = {subvar, supervar}; ProglangType rep1 = subvar.rep_type; ProglangType rep2 = supervar.rep_type; if (!(((rep1 == ProglangType.INT_ARRAY) && (rep2 == ProglangType.INT_ARRAY)) || ((rep1 == ProglangType.DOUBLE_ARRAY) && (rep2 == ProglangType.DOUBLE_ARRAY)) || ((rep1 == ProglangType.STRING_ARRAY) && (rep2 == ProglangType.STRING_ARRAY)))) return false; if (debug.isLoggable(Level.FINE)) { debug.fine( "Checking isObviousSubSequenceDynamically " + subvar.name() + " in " + supervar.name()); } Object[] di = isObviousSubSequence(subvar, supervar); if (di[1] != null) { inv.log("ObvSubSeq- true from isObviousSubSequence: " + di[1]); return true; } debug.fine(" not isObviousSubSequence(statically)"); PptTopLevel ppt_parent = subvar.ppt; // If the elements of supervar are always the same (EltOneOf), // we aren't going to learn anything new from this invariant, // since each sequence should have an EltOneOf over it. if (false) { PptSlice1 slice = ppt_parent.findSlice(supervar); if (slice == null) { System.out.println("No slice: parent =" + ppt_parent); } else { System.out.println("Slice var =" + slice.var_infos[0]); for (Invariant superinv : slice.invs) { System.out.println("Inv = " + superinv); if (superinv instanceof EltOneOf) { EltOneOf eltinv = (EltOneOf) superinv; if (eltinv.num_elts() > 0) { inv.log(" obvious because of " + eltinv.format()); return true; } } } } } // Obvious if subvar is always just [] if (true) { PptSlice1 slice = ppt_parent.findSlice(subvar); if (slice != null) { for (Invariant subinv : slice.invs) { if (subinv instanceof OneOfSequence) { OneOfSequence seqinv = (OneOfSequence) subinv; if (seqinv.num_elts() == 1) { Object elt = seqinv.elt(); if (elt instanceof long[] && ((long[]) elt).length == 0) { Debug.log( debug, inv.getClass(), inv.ppt, vis, "ObvSubSeq- True from subvar being []"); return true; } if (elt instanceof double[] && ((double[]) elt).length == 0) { inv.log("ObvSubSeq- True from subvar being []"); return true; } } } } } } // Check for a[0..i] subseq a[0..j] but i < j. VarInfo subvar_super = subvar.isDerivedSubSequenceOf(); VarInfo supervar_super = supervar.isDerivedSubSequenceOf(); if (subvar_super != null && subvar_super == supervar_super) { // both sequences are derived from the same supersequence if ((subvar.derived instanceof SequenceScalarSubsequence || subvar.derived instanceof SequenceScalarArbitrarySubsequence) && (supervar.derived instanceof SequenceScalarSubsequence || supervar.derived instanceof SequenceScalarArbitrarySubsequence)) { VarInfo sub_left_var = null, sub_right_var = null, super_left_var = null, super_right_var = null; // I'm careful not to access foo_shift unless foo_var has been set // to a non-null value, but Java is too stupid to recognize that. int sub_left_shift = 42, sub_right_shift = 69, super_left_shift = 1492, super_right_shift = 1776; if (subvar.derived instanceof SequenceScalarSubsequence) { SequenceScalarSubsequence sub = (SequenceScalarSubsequence) subvar.derived; if (sub.from_start) { sub_right_var = sub.sclvar(); sub_right_shift = sub.index_shift; } else { sub_left_var = sub.sclvar(); sub_left_shift = sub.index_shift; } } else if (subvar.derived instanceof SequenceScalarArbitrarySubsequence) { SequenceScalarArbitrarySubsequence sub = (SequenceScalarArbitrarySubsequence) subvar.derived; sub_left_var = sub.startvar(); sub_left_shift = (sub.left_closed ? 0 : 1); sub_right_var = sub.endvar(); sub_right_shift = (sub.right_closed ? 0 : -1); } else { Assert.assertTrue(false); } if (supervar.derived instanceof SequenceScalarSubsequence) { SequenceScalarSubsequence super_ = (SequenceScalarSubsequence) supervar.derived; if (super_.from_start) { super_right_var = super_.sclvar(); super_right_shift = super_.index_shift; } else { super_left_var = super_.sclvar(); super_left_shift = super_.index_shift; } } else if (supervar.derived instanceof SequenceScalarArbitrarySubsequence) { SequenceScalarArbitrarySubsequence super_ = (SequenceScalarArbitrarySubsequence) supervar.derived; super_left_var = super_.startvar(); super_left_shift = (super_.left_closed ? 0 : 1); super_right_var = super_.endvar(); super_right_shift = (super_.right_closed ? 0 : -1); } else { Assert.assertTrue(false); } boolean left_included, right_included; if (super_left_var == null) left_included = true; else if (sub_left_var == null) // we know super_left_var != null here left_included = false; else left_included = VarInfo.compare_vars( super_left_var, super_left_shift, sub_left_var, sub_left_shift, true /* <= */); if (super_right_var == null) right_included = true; else if (sub_right_var == null) // we know super_right_var != null here right_included = false; else right_included = VarInfo.compare_vars( super_right_var, super_right_shift, sub_right_var, sub_right_shift, false /* >= */); // System.out.println("Is " + subvar.name() + " contained in " // + supervar.name() // + "? left: " + left_included + ", right: " // + right_included); if (left_included && right_included) { inv.log("ObvSubSeq- True a[0..i] subseq a[0..j] and i < j"); return true; } } else if ((subvar.derived instanceof SequenceStringSubsequence) && (supervar.derived instanceof SequenceStringSubsequence)) { // Copied from just above SequenceStringSubsequence sss1 = (SequenceStringSubsequence) subvar.derived; SequenceStringSubsequence sss2 = (SequenceStringSubsequence) supervar.derived; VarInfo index1 = sss1.sclvar(); int shift1 = sss1.index_shift; boolean start1 = sss1.from_start; VarInfo index2 = sss2.sclvar(); int shift2 = sss2.index_shift; boolean start2 = sss2.from_start; if (start1 == start2) if (VarInfo.compare_vars(index1, shift1, index2, shift2, start1)) { inv.log("True from comparing indices"); return true; } } else { Assert.assertTrue( false, "how can this happen? " + subvar.name() + " " + subvar.derived.getClass() + " " + supervar.name() + " " + supervar.derived.getClass()); } } // Also need to check A[0..i] subseq A[0..j] via compare_vars. // A subseq B[0..n] => A subseq B List<Derivation> derivees = supervar.derivees(); // For each variable derived from supervar ("B") for (Derivation der : derivees) { // System.out.println(" ... der = " + der.getVarInfo().name() + " " + der); if (der instanceof SequenceScalarSubsequence) { // If that variable is "B[0..n]" VarInfo supervar_part = der.getVarInfo(); // Get the canonical version; being equal to it is good enough. if (supervar_part.get_equalitySet_leader() == subvar) { Debug.log(debug, inv.getClass(), inv.ppt, vis, "ObvSubSeq- True from canonical leader"); return true; } if (supervar_part.isCanonical()) { if (subvar == supervar_part) { System.err.println( "Error: variables " + subvar.name() + " and " + supervar_part.name() + " are identical. Canonical"); System.err.println(subvar.isCanonical()); System.err.println(supervar_part.isCanonical()); throw new Error(); } // Check to see if there is a subsequence over the supervar if (ppt_parent.is_subsequence(subvar, supervar_part)) { if (Debug.logOn()) inv.log( "ObvSubSeq- true from A subseq B[0..n] " + subvar.name() + "/" + supervar_part.name()); return (true); } } } } return false; }
/** Constructor used for 1st segment of a discontinuous phrase. */ public DTUHypothesis( int sourceInputId, ConcreteRule<TK, FV> translationOpt, int insertionPosition, Derivation<TK, FV> baseHyp, CombinedFeaturizer<TK, FV> featurizer, Scorer<FV> scorer, SearchHeuristic<TK, FV> heuristic) { super( sourceInputId, translationOpt, translationOpt.abstractRule, insertionPosition, baseHyp, featurizer, scorer, heuristic, /* * targetPhrase= */ getSegment(translationOpt.abstractRule, 0), /* hasPendingPhrases= */ hasPendingPhrases(translationOpt, baseHyp, true, false), /* segmentIdx= */ 0); // Copy old pending phrases from parent hypothesis: this.pendingPhrases = new TreeSet<PendingPhrase<TK, FV>>(); if (baseHyp instanceof DTUHypothesis) { Set<PendingPhrase<TK, FV>> oldPhrases = ((DTUHypothesis<TK, FV>) baseHyp).pendingPhrases; for (PendingPhrase<TK, FV> oldPhrase : oldPhrases) { this.pendingPhrases.add(new PendingPhrase<TK, FV>(oldPhrase)); int lastPosition = oldPhrase.lastPosition; if (lastPosition < this.length) this.hasExpired = true; } } // First segment of a discontinuous phrase has both source and target: this.segmentIdx = 0; // If parent hypothesis has expired, so does the current: if (baseHyp.hasExpired()) this.hasExpired = true; // Add new pending phrases: // assert (MAX_TARGET_PHRASE_SPAN >= 0); if (translationOpt.abstractRule instanceof DTURule) { PendingPhrase<TK, FV> newPhrase = new PendingPhrase<TK, FV>( translationOpt, sourceInputId, this, featurizer, scorer, 0, this.length + MIN_GAP_SIZE, this.length + MAX_TARGET_PHRASE_SPAN); pendingPhrases.add(newPhrase); } // Too many pending phrases?: if (pendingPhrases.size() > MAX_PENDING_PHRASES) this.hasExpired = true; // Estimate future cost for pending phrases: pendingPhrasesCost = costPendingPhrases(); checkExpiration(); }