예제 #1
0
  private List<String> run(
      final TreeTaggerWrapper<String> aWrapper, final String aModel, final String... aTokens)
      throws IOException, TreeTaggerException {
    Assume.assumeTrue(System.getenv("TREETAGGER_HOME") != null);

    TreeTaggerWrapper.TRACE = true;

    try {
      final List<String> output = new ArrayList<String>();
      aWrapper.setModel(aModel);
      aWrapper.setHandler(
          new ProbabilityHandler<String>() {
            private String token;

            public void token(String aToken, String aPos, String aLemma) {
              token = aToken;
              if (aWrapper.getProbabilityThreshold() == null) {
                output.add(aToken + " " + aPos + " " + aLemma);
              }
            }

            public void probability(String pos, String lemma, double probability) {
              output.add(token + " " + pos + " " + lemma + " " + probability);
            }
          });
      aWrapper.process(aTokens);

      return output;
    } finally {
      aWrapper.destroy();
    }
  }
예제 #2
0
  public void testBruteChars(final String aModel, final boolean aWithProbability) throws Exception {
    Assume.assumeTrue(System.getenv("TREETAGGER_HOME") != null);

    TreeTaggerWrapper.TRACE = false;

    TreeTaggerWrapper<String> tt = new TreeTaggerWrapper<String>();
    if (aWithProbability) {
      tt.setProbabilityThreshold(0.1);
    }
    tt.setModel(aModel);

    int exceptionCount = 0;
    int[] cp = new int[1];
    int lastGood = -1;
    int lastLog = 0;
    for (int c = 0x0; c <= Character.MAX_CODE_POINT; c++) {
      try {
        cp[0] = c;
        tt.process(new String[] {new String(cp, 0, 1)});

        if (c <= 0xFFFF && lastGood != c - 1) {
          System.out.printf("[0x%08X] - [0x%08X] causes problems %n", lastGood + 1, c - 1);
        }
        lastGood = cp[0];
        if (c > 0xFFFF) {
          // Faster scanning above 16bit
          c += 12;
        }
        if (c / 0x1000 > lastLog) {
          System.out.printf("Progress: %08X%n", cp[0]);
          lastLog = c / 0x1000;
        }
      } catch (TreeTaggerException e) {
        System.out.printf("[0x%08X] - %s%n", cp[0], e.getMessage());
        tt.destroy();
        if (aWithProbability) {
          tt.setProbabilityThreshold(0.1);
        }
        tt.setModel(aModel);
        exceptionCount++;
      }
    }
    assertEquals(0, exceptionCount);
  }