protected double m1MaximumAlignment(TokenedString input, TokenedString gText) { int l = gText.size(); int m = input.size(); double prod = 1.; for (int k = 1; k <= m; k++) { String pWord = input.t(k); if (!this.prodDictionary.contains(pWord)) { continue; } // find best match double bestMatch = 0.; for (int j = 0; j <= l; j++) { double word = this.wp.prob(pWord, gText.t(j)); if (word > bestMatch) { bestMatch = word; } } prod *= bestMatch; } // normalize by how many possible alignments there are double prob = prod * Math.pow(l + 1, m); return prob; }
protected List<Integer> m1mlAlignment(TokenedString input, TokenedString gText) { int l = gText.size(); int m = input.size(); List<Integer> alignment = new ArrayList<Integer>(m); for (int i = 1; i <= m; i++) { String pWord = input.t(i); double maxP = -1.; int maxV = -1; for (int j = 0; j <= l; j++) { String gWord = gText.t(j); double p = this.wp.prob(pWord, gWord); if (p > maxP) { maxV = j; maxP = p; } } System.out.print(maxV + "(" + maxP + ") "); alignment.add(maxV); } System.out.println(""); return alignment; }
protected double sampleMargAlign(TokenedString input, TokenedString gText, int nSamples) { int l = gText.size(); int m = input.size(); double alignMarg = 0.; for (int i = 0; i < nSamples; i++) { List<Integer> alignment = this.sampleAlignment(l, m); double prod = 1.; for (int k = 1; k <= alignment.size(); k++) { int ak = alignment.get(k - 1); // note that alignment array is in 0-base index String pWord = input.t(k); String gWord = gText.t(ak); if (!this.prodDictionary.contains(pWord)) { continue; } double dist = this.dp.prob(ak, k, l, m); double word = this.wp.prob(pWord, gWord); prod *= word; } alignMarg += prod; } alignMarg /= (double) nSamples; return alignMarg; }
protected double enumMargAlign(TokenedString input, TokenedString gText, double minDistProb) { int l = gText.size(); int m = input.size(); double alignMarg = 0.; // List <List<Integer>> allAlignments = getAllExAlignents(l, m); List<List<Integer>> allAlignments = this.getLikelyAlignments(l, m, minDistProb); // System.out.println(allAlignments.size()); for (List<Integer> alignment : allAlignments) { double prod = 1.; for (int k = 1; k <= alignment.size(); k++) { int ak = alignment.get(k - 1); // note that alignment array is in 0-base index String pWord = input.t(k); String gWord = gText.t(ak); double dist = this.dp.prob(ak, k, l, m); double word = this.wp.prob(pWord, gWord); prod *= dist * word; } alignMarg += prod; } return alignMarg; }
/** * Finds argMax_g N_{l_g,m} \sum_{a \in alignments} \prod_{k=1}^m d_{a_k,k,l,m} t{e_a_k,f_k} * * @param input * @return */ public DecodeResult decode(TokenedString input) { int m = input.size(); double sum = 0.; double sumNN = 0.; double maxN = -1.; double maxNN = -1.; TokenedString maxDecode = null; TokenedString maxDecodeNN = null; for (TokenedString gText : this.allGeneratingText) { int l = gText.size(); double n = this.lp.prob(l, m); double alignMarg = 0.; if (n > 0) { alignMarg = this.sampleMargAlign(input, gText, 1000); /*double alignMarg = this.enumMargAlign(input, gText, 0.0); if(alignMarg > 0.000001){ System.out.println("diff: " + (Math.abs(alignMarg - sAlignMarg) / alignMarg) + " " + alignMarg + " " + sAlignMarg); }*/ } else { alignMarg = this.m1MaximumAlignment(input, gText); } double pG = alignMarg * this.probGenSentences.get(gText); sumNN += pG; if (pG > maxNN) { maxNN = pG; maxDecodeNN = gText; } pG *= n; sum += pG; if (pG > maxN) { maxN = pG; maxDecode = gText; } } double p = maxN / sum; double pNN = maxNN / sumNN; DecodeResult dr = null; if (p > 0.) { dr = new DecodeResult(input, maxDecode, p); } else { dr = new DecodeResult(input, maxDecodeNN, pNN); } return dr; }
public List<DecodeResult> probDist(TokenedString input) { List<DecodeResult> result = new ArrayList<DecodeResult>(); List<DecodeResult> resultNN = new ArrayList<DecodeResult>(); int m = input.size(); double sum = 0.; double sumNN = 0.; for (TokenedString gText : this.allGeneratingText) { int l = gText.size(); double n = this.lp.prob(l, m); double alignMarg = 0.; if (n > 0) { alignMarg = this.sampleMargAlign(input, gText, 1000); } else { alignMarg = this.m1MaximumAlignment(input, gText); } double pG = alignMarg * this.probGenSentences.get(gText); sumNN += pG; DecodeResult drnn = new DecodeResult(input, gText, pG); resultNN.add(drnn); pG *= n; sum += pG; DecodeResult dr = new DecodeResult(input, gText, pG); result.add(dr); } // Normalize for (DecodeResult dr : result) { dr.prob /= sum; } for (DecodeResult drnn : resultNN) { drnn.prob /= sumNN; } if (sum > 0.) { return result; } else { return resultNN; } }
public void probDistMLProbe(TokenedString input) { System.out.println("Computing Distribution for: " + input.toString()); System.out.println("======================================================"); int m = input.size(); for (TokenedString gText : this.allGeneratingText) { double prior = this.probGenSentences.get(gText); System.out.println(prior + ": " + gText.toString()); int l = gText.size(); double n = this.lp.prob(l, m); System.out.println(n + ": p(" + m + "| " + l + ") length prob"); List<Integer> alignment = null; if (n > 0.) { alignment = this.mlAlignment(l, m); } else { alignment = this.m1mlAlignment(input, gText); } for (int k = 1; k <= alignment.size(); k++) { int ak = alignment.get(k - 1); // note that alignment array is in 0-base index String pWord = input.t(k); String gWord = gText.t(ak); if (!this.prodDictionary.contains(pWord)) { System.out.println("NA"); continue; } double word = this.wp.prob(pWord, gWord); System.out.println(word + ": p(" + pWord + " | " + gWord + ")"); } System.out.println("----------------------------------------------------------"); } System.out.println("**********************************************************\n\n"); }