示例#1
0
 @Test
 public void testCorp() {
   // We test a 2x2 design: {strict, regular} x {no following context, following context}
   for (int sent = 0; sent < 4; sent++) {
     PTBTokenizer<CoreLabel> ptbTokenizer =
         new PTBTokenizer<>(
             new StringReader(corpInputs[sent / 2]),
             new CoreLabelTokenFactory(),
             (sent % 2 == 0) ? "strictTreebank3" : "");
     int i = 0;
     while (ptbTokenizer.hasNext()) {
       CoreLabel w = ptbTokenizer.next();
       try {
         assertEquals("PTBTokenizer problem", corpGold[sent % 2][i], w.word());
       } catch (ArrayIndexOutOfBoundsException aioobe) {
         // the assertion below outside the loop will fail
       }
       i++;
     }
     if (i != corpGold[sent % 2].length) {
       System.out.println("Gold: " + Arrays.toString(corpGold[sent % 2]));
       List<CoreLabel> tokens =
           new PTBTokenizer<>(
                   new StringReader(corpInputs[sent / 2]),
                   new CoreLabelTokenFactory(),
                   (sent % 2 == 0) ? "strictTreebank3" : "")
               .tokenize();
       System.out.println("Guess: " + SentenceUtils.listToString(tokens));
       System.out.flush();
     }
     assertEquals("PTBTokenizer num tokens problem", i, corpGold[sent % 2].length);
   }
 }