/** * Cleans the entities, uses a tokenizer to tokenize all entities with the same algorithm. * * @param list * @return */ protected List<Entity> clean(List<Entity> list) { LOG.info("clean entities ..."); // clean token with the tokenizer for (Entity entity : list) { StringBuilder cleanText = new StringBuilder(); String[] tokens = FoxTextUtil.getSentenceToken(entity.getText() + "."); // String[] tokens = FoxTextUtil.getToken(entity.getText()); for (String token : tokens) { if (!token.trim().isEmpty()) { cleanText.append(token); cleanText.append(" "); } } entity.setText(cleanText.toString().trim()); } list = new ArrayList<Entity>(list); LOG.info("clean entities done."); return list; }
private void logMsg() { // DEBUG if (entityList.size() > 0) LOG.debug(entityList.size() + "(" + entityList.iterator().next().getTool() + ")"); for (Entity entity : entityList) LOG.debug(entity.getText() + "=>" + entity.getType() + "(" + entity.getTool() + ")"); // INFO int l = 0, o = 0, p = 0; List<String> list = new ArrayList<>(); for (Entity e : entityList) { if (!list.contains(e.getText())) { if (e.getType().equals(EntityClassMap.L)) l++; if (e.getType().equals(EntityClassMap.O)) o++; if (e.getType().equals(EntityClassMap.P)) p++; list.add(e.getText()); } } LOG.info(this.getToolName() + ":"); LOG.info(l + " LOCs found"); LOG.info(o + " ORGs found"); LOG.info(p + " PERs found"); LOG.info(entityList.size() + " total found"); l = 0; o = 0; p = 0; for (Entity e : entityList) { if (e.getType().equals(EntityClassMap.L)) l += e.getText().split(" ").length; if (e.getType().equals(EntityClassMap.O)) o += e.getText().split(" ").length; if (e.getType().equals(EntityClassMap.P)) p += e.getText().split(" ").length; } LOG.info(this.getToolName() + "(token):"); LOG.info(l + " LOCs found"); LOG.info(o + " ORGs found"); LOG.info(p + " PERs found"); LOG.info(l + o + p + " total found"); }