Пример #1
0
  private EnsembleMentionEntitySimilarity prepapreMES() {
    Entities entities = new Entities();
    for (Mention mention : input.getMentions().getMentions()) {
      MentionTracer mt = new MentionTracer(mention);
      tracer.addMentionForDocId(docId, mention, mt);
      for (Entity entity : mention.getCandidateEntities()) {
        EntityTracer et = new EntityTracer(entity.getName());
        tracer.addEntityForMention(mention, entity.getName(), et);
      }
      entities.addAll(mention.getCandidateEntities());
    }

    logger.info(
        "Disambiguating '"
            + docId
            + "' ("
            + input.getMentions().getMentions().size()
            + " mentions, "
            + entities.size()
            + " entities)");

    if (includeNullAsEntityCandidate) {
      entities.setIncludesNmeEntities(true);
    }

    EnsembleMentionEntitySimilarity mes = null;
    try {
      mes = new EnsembleMentionEntitySimilarity(input.getMentions(), entities, ss, docId, tracer);
      return mes;
    } catch (Exception e) {
      e.printStackTrace();
      return null;
    }
  }
Пример #2
0
  protected void disambiguate(EnsembleMentionEntitySimilarity mes) throws Exception {
    for (Mention mention : input.getMentions().getMentions()) {
      List<ResultEntity> entities = new LinkedList<ResultEntity>();

      for (Entity entity : mention.getCandidateEntities()) {
        double sim = mes.calcSimilarity(mention, input.getContext(), entity);
        entities.add(new ResultEntity(entity.getName(), sim));
      }

      if (!entities.isEmpty()) {
        Collections.sort(entities);
      } else {
        entities.add(ResultEntity.getNoMatchingEntity());
      }

      Map<ResultMention, List<ResultEntity>> docSolutions = solutions.get(docId);
      if (docSolutions == null) {
        docSolutions = new HashMap<ResultMention, List<ResultEntity>>();
        solutions.put(docId, docSolutions);
      }

      ResultMention rm =
          new ResultMention(
              docId, mention.getMention(), mention.getCharOffset(), mention.getCharLength());

      docSolutions.put(rm, entities);
    }
  }
Пример #3
0
  @Test
  public void sentencesTokensTest() {
    Preparator p = new Preparator();
    PreparedInput input =
        p.prepare(
            "temp",
            "This is a sentence. And another one. Just for the kicks.",
            new StanfordManualPreparationSettings());

    Tokens ts = input.getTokens();
    List<List<Token>> sentences = ts.getSentenceTokens();
    assertEquals(3, sentences.size());
    assertEquals("This", sentences.get(0).get(0).getOriginal());
    assertEquals("sentence", sentences.get(0).get(3).getOriginal());
    assertEquals(".", sentences.get(0).get(4).getOriginal());
    assertEquals("another", sentences.get(1).get(1).getOriginal());
    assertEquals("kicks", sentences.get(2).get(3).getOriginal());
  }
Пример #4
0
  @Override
  public void run() {
    long beginTime = System.currentTimeMillis();
    try {
      AidaManager.fillInCandidateEntities(
          docId, input.getMentions(), includeNullAsEntityCandidate, includeContextMentions);
    } catch (SQLException e) {
      logger.error("SQLException when getting candidates: " + e.getLocalizedMessage());
    }

    EnsembleMentionEntitySimilarity mes = prepapreMES();

    try {
      disambiguate(mes);
    } catch (Exception e) {
      logger.error("Error: " + e.getLocalizedMessage());
    }
    double runTime = (System.currentTimeMillis() - beginTime) / (double) 1000;
    logger.info("Document '" + docId + "' done in " + nf.format(runTime) + "s");
  }
Пример #5
0
  public LocalDisambiguation(
      PreparedInput input,
      SimilaritySettings settings,
      boolean includeNullAsEntityCandidate,
      boolean includeContextMentions,
      String docId,
      Map<String, Map<ResultMention, List<ResultEntity>>> solutions,
      Tracer tracer)
      throws SQLException {
    nf = NumberFormat.getNumberInstance(Locale.ENGLISH);
    nf.setMaximumFractionDigits(2);
    logger.debug(
        "Preparing '" + docId + "' (" + input.getMentions().getMentions().size() + " mentions)");

    this.ss = settings;
    this.docId = docId;
    this.solutions = solutions;
    this.input = input;
    this.includeNullAsEntityCandidate = includeNullAsEntityCandidate;
    this.includeContextMentions = includeContextMentions;
    this.tracer = tracer;

    logger.debug("Finished preparing '" + docId + "'");
  }