Пример #1
0
  /**
   * Cleans the entities, uses a tokenizer to tokenize all entities with the same algorithm.
   *
   * @param list
   * @return
   */
  protected List<Entity> clean(List<Entity> list) {
    LOG.info("clean entities ...");

    // clean token with the tokenizer
    for (Entity entity : list) {
      StringBuilder cleanText = new StringBuilder();
      String[] tokens = FoxTextUtil.getSentenceToken(entity.getText() + ".");
      // String[] tokens = FoxTextUtil.getToken(entity.getText());
      for (String token : tokens) {
        if (!token.trim().isEmpty()) {
          cleanText.append(token);
          cleanText.append(" ");
        }
      }
      entity.setText(cleanText.toString().trim());
    }
    list = new ArrayList<Entity>(list);

    LOG.info("clean entities done.");
    return list;
  }
Пример #2
0
  private void logMsg() {
    // DEBUG
    if (entityList.size() > 0)
      LOG.debug(entityList.size() + "(" + entityList.iterator().next().getTool() + ")");
    for (Entity entity : entityList)
      LOG.debug(entity.getText() + "=>" + entity.getType() + "(" + entity.getTool() + ")");

    // INFO
    int l = 0, o = 0, p = 0;
    List<String> list = new ArrayList<>();
    for (Entity e : entityList) {
      if (!list.contains(e.getText())) {
        if (e.getType().equals(EntityClassMap.L)) l++;
        if (e.getType().equals(EntityClassMap.O)) o++;
        if (e.getType().equals(EntityClassMap.P)) p++;
        list.add(e.getText());
      }
    }
    LOG.info(this.getToolName() + ":");
    LOG.info(l + " LOCs found");
    LOG.info(o + " ORGs found");
    LOG.info(p + " PERs found");
    LOG.info(entityList.size() + " total found");
    l = 0;
    o = 0;
    p = 0;
    for (Entity e : entityList) {
      if (e.getType().equals(EntityClassMap.L)) l += e.getText().split(" ").length;
      if (e.getType().equals(EntityClassMap.O)) o += e.getText().split(" ").length;
      if (e.getType().equals(EntityClassMap.P)) p += e.getText().split(" ").length;
    }
    LOG.info(this.getToolName() + "(token):");
    LOG.info(l + " LOCs found");
    LOG.info(o + " ORGs found");
    LOG.info(p + " PERs found");
    LOG.info(l + o + p + " total found");
  }