コード例 #1
0
 private static Map<Integer, Alignment> readAlignments(String fileName) {
   Map<Integer,Alignment> alignments = new HashMap<Integer, Alignment>();
   try {
     BufferedReader in = new BufferedReader(new FileReader(fileName));
     while (in.ready()) {
       String line = in.readLine();
       String[] words = line.split("\\s+");
       if (words.length != 4)
         throw new RuntimeException("Bad alignment file "+fileName+", bad line was "+line);
       Integer sentenceID = Integer.parseInt(words[0]);
       Integer englishPosition = Integer.parseInt(words[1])-1;
       Integer frenchPosition = Integer.parseInt(words[2])-1;
       String type = words[3];
       Alignment alignment = alignments.get(sentenceID);
       if (alignment == null) {
         alignment = new Alignment();
         alignments.put(sentenceID, alignment);
       }
       alignment.addAlignment(englishPosition, frenchPosition, type.equals("S"));
     }
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
   return alignments;
 }
コード例 #2
0
 private static List<SentencePair> readSentencePairs(String baseFileName) {
   List<SentencePair> sentencePairs = new ArrayList<SentencePair>();
   String englishFileName = baseFileName + "." + ENGLISH_EXTENSION;
   String frenchFileName = baseFileName + "." + FRENCH_EXTENSION;
   try {
     BufferedReader englishIn = new BufferedReader(new FileReader(englishFileName));
     //BufferedReader frenchIn = new BufferedReader(new FileReader(frenchFileName));
     BufferedReader frenchIn = new BufferedReader(new InputStreamReader(
   		  new FileInputStream(frenchFileName), StandardCharsets.ISO_8859_1));
     while (englishIn.ready() && frenchIn.ready()) {
       String englishLine = englishIn.readLine();
       String frenchLine = frenchIn.readLine();
       Pair<Integer,List<String>> englishSentenceAndID = readSentence(englishLine);
       Pair<Integer,List<String>> frenchSentenceAndID = readSentence(frenchLine);
       if (! englishSentenceAndID.getFirst().equals(frenchSentenceAndID.getFirst()))
         throw new RuntimeException("Sentence ID confusion in file "+baseFileName+", lines were:\n\t"+englishLine+"\n\t"+frenchLine);
       sentencePairs.add(new SentencePair(englishSentenceAndID.getFirst(), baseFileName, englishSentenceAndID.getSecond(), frenchSentenceAndID.getSecond()));
     }
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
   return sentencePairs;
 }