예제 #1
0
  protected double m1MaximumAlignment(TokenedString input, TokenedString gText) {

    int l = gText.size();
    int m = input.size();

    double prod = 1.;
    for (int k = 1; k <= m; k++) {

      String pWord = input.t(k);
      if (!this.prodDictionary.contains(pWord)) {
        continue;
      }

      // find best match
      double bestMatch = 0.;
      for (int j = 0; j <= l; j++) {
        double word = this.wp.prob(pWord, gText.t(j));
        if (word > bestMatch) {
          bestMatch = word;
        }
      }

      prod *= bestMatch;
    }

    // normalize by how many possible alignments there are
    double prob = prod * Math.pow(l + 1, m);

    return prob;
  }
예제 #2
0
  protected List<Integer> m1mlAlignment(TokenedString input, TokenedString gText) {

    int l = gText.size();
    int m = input.size();

    List<Integer> alignment = new ArrayList<Integer>(m);

    for (int i = 1; i <= m; i++) {
      String pWord = input.t(i);

      double maxP = -1.;
      int maxV = -1;
      for (int j = 0; j <= l; j++) {
        String gWord = gText.t(j);
        double p = this.wp.prob(pWord, gWord);
        if (p > maxP) {
          maxV = j;
          maxP = p;
        }
      }
      System.out.print(maxV + "(" + maxP + ") ");
      alignment.add(maxV);
    }
    System.out.println("");

    return alignment;
  }
예제 #3
0
  protected double sampleMargAlign(TokenedString input, TokenedString gText, int nSamples) {

    int l = gText.size();
    int m = input.size();

    double alignMarg = 0.;
    for (int i = 0; i < nSamples; i++) {
      List<Integer> alignment = this.sampleAlignment(l, m);
      double prod = 1.;
      for (int k = 1; k <= alignment.size(); k++) {
        int ak = alignment.get(k - 1); // note that alignment array is in 0-base index
        String pWord = input.t(k);
        String gWord = gText.t(ak);

        if (!this.prodDictionary.contains(pWord)) {
          continue;
        }

        double dist = this.dp.prob(ak, k, l, m);
        double word = this.wp.prob(pWord, gWord);

        prod *= word;
      }
      alignMarg += prod;
    }

    alignMarg /= (double) nSamples;

    return alignMarg;
  }
예제 #4
0
  protected double enumMargAlign(TokenedString input, TokenedString gText, double minDistProb) {

    int l = gText.size();
    int m = input.size();

    double alignMarg = 0.;
    // List <List<Integer>> allAlignments = getAllExAlignents(l, m);
    List<List<Integer>> allAlignments = this.getLikelyAlignments(l, m, minDistProb);
    // System.out.println(allAlignments.size());
    for (List<Integer> alignment : allAlignments) {

      double prod = 1.;
      for (int k = 1; k <= alignment.size(); k++) {
        int ak = alignment.get(k - 1); // note that alignment array is in 0-base index
        String pWord = input.t(k);
        String gWord = gText.t(ak);

        double dist = this.dp.prob(ak, k, l, m);
        double word = this.wp.prob(pWord, gWord);

        prod *= dist * word;
      }

      alignMarg += prod;
    }

    return alignMarg;
  }
예제 #5
0
  /**
   * Finds argMax_g N_{l_g,m} \sum_{a \in alignments} \prod_{k=1}^m d_{a_k,k,l,m} t{e_a_k,f_k}
   *
   * @param input
   * @return
   */
  public DecodeResult decode(TokenedString input) {

    int m = input.size();
    double sum = 0.;
    double sumNN = 0.;
    double maxN = -1.;
    double maxNN = -1.;
    TokenedString maxDecode = null;
    TokenedString maxDecodeNN = null;

    for (TokenedString gText : this.allGeneratingText) {

      int l = gText.size();

      double n = this.lp.prob(l, m);
      double alignMarg = 0.;

      if (n > 0) {

        alignMarg = this.sampleMargAlign(input, gText, 1000);
        /*double alignMarg = this.enumMargAlign(input, gText, 0.0);
        if(alignMarg > 0.000001){
        	System.out.println("diff: " + (Math.abs(alignMarg - sAlignMarg) / alignMarg) + " " + alignMarg + " " + sAlignMarg);
        }*/
      } else {
        alignMarg = this.m1MaximumAlignment(input, gText);
      }

      double pG = alignMarg * this.probGenSentences.get(gText);
      sumNN += pG;
      if (pG > maxNN) {
        maxNN = pG;
        maxDecodeNN = gText;
      }

      pG *= n;
      sum += pG;
      if (pG > maxN) {
        maxN = pG;
        maxDecode = gText;
      }
    }

    double p = maxN / sum;
    double pNN = maxNN / sumNN;

    DecodeResult dr = null;
    if (p > 0.) {
      dr = new DecodeResult(input, maxDecode, p);
    } else {
      dr = new DecodeResult(input, maxDecodeNN, pNN);
    }

    return dr;
  }
예제 #6
0
  public List<DecodeResult> probDist(TokenedString input) {

    List<DecodeResult> result = new ArrayList<DecodeResult>();
    List<DecodeResult> resultNN = new ArrayList<DecodeResult>();

    int m = input.size();
    double sum = 0.;
    double sumNN = 0.;

    for (TokenedString gText : this.allGeneratingText) {

      int l = gText.size();

      double n = this.lp.prob(l, m);
      double alignMarg = 0.;
      if (n > 0) {
        alignMarg = this.sampleMargAlign(input, gText, 1000);
      } else {
        alignMarg = this.m1MaximumAlignment(input, gText);
      }

      double pG = alignMarg * this.probGenSentences.get(gText);
      sumNN += pG;
      DecodeResult drnn = new DecodeResult(input, gText, pG);
      resultNN.add(drnn);

      pG *= n;
      sum += pG;
      DecodeResult dr = new DecodeResult(input, gText, pG);
      result.add(dr);
    }

    // Normalize
    for (DecodeResult dr : result) {
      dr.prob /= sum;
    }
    for (DecodeResult drnn : resultNN) {
      drnn.prob /= sumNN;
    }

    if (sum > 0.) {
      return result;
    } else {
      return resultNN;
    }
  }
예제 #7
0
  public void probDistMLProbe(TokenedString input) {

    System.out.println("Computing Distribution for: " + input.toString());
    System.out.println("======================================================");

    int m = input.size();
    for (TokenedString gText : this.allGeneratingText) {

      double prior = this.probGenSentences.get(gText);
      System.out.println(prior + ": " + gText.toString());
      int l = gText.size();

      double n = this.lp.prob(l, m);
      System.out.println(n + ": p(" + m + "| " + l + ")    length prob");
      List<Integer> alignment = null;
      if (n > 0.) {
        alignment = this.mlAlignment(l, m);
      } else {
        alignment = this.m1mlAlignment(input, gText);
      }

      for (int k = 1; k <= alignment.size(); k++) {
        int ak = alignment.get(k - 1); // note that alignment array is in 0-base index
        String pWord = input.t(k);
        String gWord = gText.t(ak);

        if (!this.prodDictionary.contains(pWord)) {
          System.out.println("NA");
          continue;
        }

        double word = this.wp.prob(pWord, gWord);
        System.out.println(word + ": p(" + pWord + " | " + gWord + ")");
      }

      System.out.println("----------------------------------------------------------");
    }

    System.out.println("**********************************************************\n\n");
  }