Ejemplo n.º 1
0
  protected void addPhraseToIndex(
      WordAlignment sent, int f1, int f2, int e1, int e2, boolean isConsistent, float weight) {

    if (onlyTightPhrases) {
      if (sent.f2e(f1).isEmpty()
          || sent.f2e(f2).isEmpty()
          || sent.e2f(e1).isEmpty()
          || sent.e2f(e2).isEmpty()) return;
    }

    if (ignore(sent, f1, f2, e1, e2)) return;

    // Check if alTemp meets length requirements:
    if (f2 - f1 >= maxExtractedPhraseLenF || e2 - e1 >= maxExtractedPhraseLenE) {
      if (isConsistent) {
        alGrid.addAlTemp(f1, f2, e1, e2);
      }
      if (DETAILED_DEBUG) System.err.printf("skipping too long: %d %d\n", f2 - f1 + 1, e2 - e1 + 1);
      return;
    }

    // Create alTemp:
    AlignmentTemplateInstance alTemp;
    alTemp = new AlignmentTemplateInstance(sent, f1, f2, e1, e2, weight);
    alGrid.addAlTemp(alTemp, isConsistent);

    alTemps.addToIndex(alTemp);
    alTemps.incrementAlignmentCount(alTemp);
  }
Ejemplo n.º 2
0
 protected void featurize(WordAlignment sent) {
   int fsize = sent.f().size();
   int esize = sent.e().size();
   // Features are extracted only once all phrases for a given
   // sentence pair are in memory
   for (AbstractFeatureExtractor e : extractors) {
     for (AlignmentTemplateInstance alTemp : alGrid.getAlTemps()) {
       e.featurizePhrase(alTemp, alGrid);
       if (PRINT_PHRASAL_GRID && fsize < PRINT_GRID_MAX_LEN && esize < PRINT_GRID_MAX_LEN)
         alGrid.printAlTempInGrid("phrase id: " + alTemp.getKey(), alTemp, System.err);
     }
   }
 }