private static Map<Integer, Alignment> readAlignments(String fileName) { Map<Integer,Alignment> alignments = new HashMap<Integer, Alignment>(); try { BufferedReader in = new BufferedReader(new FileReader(fileName)); while (in.ready()) { String line = in.readLine(); String[] words = line.split("\\s+"); if (words.length != 4) throw new RuntimeException("Bad alignment file "+fileName+", bad line was "+line); Integer sentenceID = Integer.parseInt(words[0]); Integer englishPosition = Integer.parseInt(words[1])-1; Integer frenchPosition = Integer.parseInt(words[2])-1; String type = words[3]; Alignment alignment = alignments.get(sentenceID); if (alignment == null) { alignment = new Alignment(); alignments.put(sentenceID, alignment); } alignment.addAlignment(englishPosition, frenchPosition, type.equals("S")); } } catch (IOException e) { throw new RuntimeException(e); } return alignments; }
public Alignment alignSentencePair(SentencePair sentencePair) { Alignment alignment = new Alignment(); List<String> frenchWords = sentencePair.getFrenchWords(); List<String> englishWords = sentencePair.getEnglishWords(); int numFrenchWords = frenchWords.size(); int numEnglishWords = englishWords.size(); // Model 1 assumes all alignments are equally likely // So we can just take the argMax of t(f|e) to get the englishMaxPosition for (int frenchPosition = 0; frenchPosition < numFrenchWords; frenchPosition++) { String f = frenchWords.get(frenchPosition); int englishMaxPosition = -1; double maxTranslationProb = translationProbs.getCount(f, NULL); for (int englishPosition = 0; englishPosition < numEnglishWords; englishPosition++) { String e = englishWords.get(englishPosition); double translationProb = translationProbs.getCount(f, e); if (translationProb > maxTranslationProb) { maxTranslationProb = translationProb; englishMaxPosition = englishPosition; } } alignment.addAlignment(englishMaxPosition, frenchPosition, true); } return alignment; }
public Alignment alignSentencePair(SentencePair sentencePair) { Alignment alignment = new Alignment(); List<String> frenchWords = sentencePair.getFrenchWords(); List<String> englishWords = sentencePair.getEnglishWords(); int numFrenchWords = frenchWords.size(); int numEnglishWords = englishWords.size(); for (int frenchPosition = 0; frenchPosition < numFrenchWords; frenchPosition++) { String f = frenchWords.get(frenchPosition); int englishMaxPosition = frenchPosition; if (englishMaxPosition >= numEnglishWords) englishMaxPosition = -1; // map French word to BASELINE if c(f,e) = 0 for all English words double maxConditionalProb = 0; for (int englishPosition = 0; englishPosition < numEnglishWords; englishPosition++) { String e = englishWords.get(englishPosition); double conditionalGivenEnglish = collocationCounts.getCount(f, e) / (eCounts.getCount(e)); if (conditionalGivenEnglish > maxConditionalProb) { maxConditionalProb = conditionalGivenEnglish; englishMaxPosition = englishPosition; } } alignment.addAlignment(englishMaxPosition, frenchPosition, true); } return alignment; }
public Alignment alignSentencePair(SentencePair sentencePair) { Alignment alignment = new Alignment(); List<String> frenchWords = sentencePair.getFrenchWords(); List<String> englishWords = sentencePair.getEnglishWords(); int numFrenchWords = frenchWords.size(); int numEnglishWords = englishWords.size(); for (int frenchPosition = 0; frenchPosition < numFrenchWords; frenchPosition++) { String f = frenchWords.get(frenchPosition); int englishMaxPosition = frenchPosition; if (englishMaxPosition >= numEnglishWords) englishMaxPosition = -1; // map French word to BASELINE if c(f,e) = 0 for all English words double maxDice = 0; for (int englishPosition = 0; englishPosition < numEnglishWords; englishPosition++) { String e = englishWords.get(englishPosition); double dice = getDiceCoefficient(f,e); if (dice > maxDice) { maxDice = dice; englishMaxPosition = englishPosition; } } alignment.addAlignment(englishMaxPosition, frenchPosition, true); } return alignment; }
public Alignment alignSentencePair(SentencePair sentencePair) { Alignment alignment = new Alignment(); int numFrenchWords = sentencePair.getFrenchWords().size(); int numEnglishWords = sentencePair.getEnglishWords().size(); for (int frenchPosition = 0; frenchPosition < numFrenchWords; frenchPosition++) { int englishPosition = frenchPosition; if (englishPosition >= numEnglishWords) englishPosition = -1; alignment.addAlignment(englishPosition, frenchPosition, true); } return alignment; }