private List<String> run( final TreeTaggerWrapper<String> aWrapper, final String aModel, final String... aTokens) throws IOException, TreeTaggerException { Assume.assumeTrue(System.getenv("TREETAGGER_HOME") != null); TreeTaggerWrapper.TRACE = true; try { final List<String> output = new ArrayList<String>(); aWrapper.setModel(aModel); aWrapper.setHandler( new ProbabilityHandler<String>() { private String token; public void token(String aToken, String aPos, String aLemma) { token = aToken; if (aWrapper.getProbabilityThreshold() == null) { output.add(aToken + " " + aPos + " " + aLemma); } } public void probability(String pos, String lemma, double probability) { output.add(token + " " + pos + " " + lemma + " " + probability); } }); aWrapper.process(aTokens); return output; } finally { aWrapper.destroy(); } }
public void testBruteChars(final String aModel, final boolean aWithProbability) throws Exception { Assume.assumeTrue(System.getenv("TREETAGGER_HOME") != null); TreeTaggerWrapper.TRACE = false; TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>(); if (aWithProbability) { tt.setProbabilityThreshold(0.1); } tt.setModel(aModel); int exceptionCount = 0; int[] cp = new int[1]; int lastGood = -1; int lastLog = 0; for (int c = 0x0; c <= Character.MAX_CODE_POINT; c++) { try { cp[0] = c; tt.process(new String[] {new String(cp, 0, 1)}); if (c <= 0xFFFF && lastGood != c - 1) { System.out.printf("[0x%08X] - [0x%08X] causes problems %n", lastGood + 1, c - 1); } lastGood = cp[0]; if (c > 0xFFFF) { // Faster scanning above 16bit c += 12; } if (c / 0x1000 > lastLog) { System.out.printf("Progress: %08X%n", cp[0]); lastLog = c / 0x1000; } } catch (TreeTaggerException e) { System.out.printf("[0x%08X] - %s%n", cp[0], e.getMessage()); tt.destroy(); if (aWithProbability) { tt.setProbabilityThreshold(0.1); } tt.setModel(aModel); exceptionCount++; } } assertEquals(0, exceptionCount); }