protected void addPhraseToIndex( WordAlignment sent, int f1, int f2, int e1, int e2, boolean isConsistent, float weight) { if (onlyTightPhrases) { if (sent.f2e(f1).isEmpty() || sent.f2e(f2).isEmpty() || sent.e2f(e1).isEmpty() || sent.e2f(e2).isEmpty()) return; } if (ignore(sent, f1, f2, e1, e2)) return; // Check if alTemp meets length requirements: if (f2 - f1 >= maxExtractedPhraseLenF || e2 - e1 >= maxExtractedPhraseLenE) { if (isConsistent) { alGrid.addAlTemp(f1, f2, e1, e2); } if (DETAILED_DEBUG) System.err.printf("skipping too long: %d %d\n", f2 - f1 + 1, e2 - e1 + 1); return; } // Create alTemp: AlignmentTemplateInstance alTemp; alTemp = new AlignmentTemplateInstance(sent, f1, f2, e1, e2, weight); alGrid.addAlTemp(alTemp, isConsistent); alTemps.addToIndex(alTemp); alTemps.incrementAlignmentCount(alTemp); }
protected void featurize(WordAlignment sent) { int fsize = sent.f().size(); int esize = sent.e().size(); // Features are extracted only once all phrases for a given // sentence pair are in memory for (AbstractFeatureExtractor e : extractors) { for (AlignmentTemplateInstance alTemp : alGrid.getAlTemps()) { e.featurizePhrase(alTemp, alGrid); if (PRINT_PHRASAL_GRID && fsize < PRINT_GRID_MAX_LEN && esize < PRINT_GRID_MAX_LEN) alGrid.printAlTempInGrid("phrase id: " + alTemp.getKey(), alTemp, System.err); } } }