@Test public void testCorp() { // We test a 2x2 design: {strict, regular} x {no following context, following context} for (int sent = 0; sent < 4; sent++) { PTBTokenizer<CoreLabel> ptbTokenizer = new PTBTokenizer<>( new StringReader(corpInputs[sent / 2]), new CoreLabelTokenFactory(), (sent % 2 == 0) ? "strictTreebank3" : ""); int i = 0; while (ptbTokenizer.hasNext()) { CoreLabel w = ptbTokenizer.next(); try { assertEquals("PTBTokenizer problem", corpGold[sent % 2][i], w.word()); } catch (ArrayIndexOutOfBoundsException aioobe) { // the assertion below outside the loop will fail } i++; } if (i != corpGold[sent % 2].length) { System.out.println("Gold: " + Arrays.toString(corpGold[sent % 2])); List<CoreLabel> tokens = new PTBTokenizer<>( new StringReader(corpInputs[sent / 2]), new CoreLabelTokenFactory(), (sent % 2 == 0) ? "strictTreebank3" : "") .tokenize(); System.out.println("Guess: " + SentenceUtils.listToString(tokens)); System.out.flush(); } assertEquals("PTBTokenizer num tokens problem", i, corpGold[sent % 2].length); } }