public void testBruteChars(final String aModel, final boolean aWithProbability) throws Exception { Assume.assumeTrue(System.getenv("TREETAGGER_HOME") != null); TreeTaggerWrapper.TRACE = false; TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>(); if (aWithProbability) { tt.setProbabilityThreshold(0.1); } tt.setModel(aModel); int exceptionCount = 0; int[] cp = new int[1]; int lastGood = -1; int lastLog = 0; for (int c = 0x0; c <= Character.MAX_CODE_POINT; c++) { try { cp[0] = c; tt.process(new String[] {new String(cp, 0, 1)}); if (c <= 0xFFFF && lastGood != c - 1) { System.out.printf("[0x%08X] - [0x%08X] causes problems %n", lastGood + 1, c - 1); } lastGood = cp[0]; if (c > 0xFFFF) { // Faster scanning above 16bit c += 12; } if (c / 0x1000 > lastLog) { System.out.printf("Progress: %08X%n", cp[0]); lastLog = c / 0x1000; } } catch (TreeTaggerException e) { System.out.printf("[0x%08X] - %s%n", cp[0], e.getMessage()); tt.destroy(); if (aWithProbability) { tt.setProbabilityThreshold(0.1); } tt.setModel(aModel); exceptionCount++; } } assertEquals(0, exceptionCount); }
@Test public void testEnglishWithProbabilities4() throws Exception { TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>(); tt.setProbabilityThreshold(0.1); List<String> actual = run(tt, "english-par-linux-3.2.bin:iso8859-1", "lead"); List<String> expected = asList("lead NN lead 0.647454", "lead VV lead 0.196787", "lead JJ lead 0.142647"); assertEquals(expected, actual); }
@Test public void testEnglishWithProbabilities() throws Exception { TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>(); tt.setProbabilityThreshold(0.1); List<String> actual = run(tt, "english-par-linux-3.2.bin:iso8859-1", "This", "is", "a", "test", "."); List<String> expected = asList( "This DT this 1.0", "is VBZ be 1.0", "a DT a 1.0", "test NN test 0.999661", ". SENT . 1.0"); assertEquals(expected, actual); }
@Test public void testEnglishWithProbabilities2() throws Exception { TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>(); tt.setProbabilityThreshold(0.1); List<String> actual = run( tt, "english-par-linux-3.2.bin:iso8859-1", "He", "could", "lead", "if", "he", "would", "get", "the", "lead", "out", "."); List<String> expected = asList( "He PP he 1.0", "could MD could 1.0", "lead VV lead 0.999748", "if IN if 1.0", "he PP he 1.0", "would MD would 1.0", "get VV get 1.0", "the DT the 0.999993", "lead NN lead 0.753085", "lead VV lead 0.103856", "out RP out 0.726204", "out IN out 0.226546", ". SENT . 1.0"); assertEquals(expected, actual); }