Beispiel #1
0
  private EnsembleMentionEntitySimilarity prepapreMES() {
    Entities entities = new Entities();
    for (Mention mention : input.getMentions().getMentions()) {
      MentionTracer mt = new MentionTracer(mention);
      tracer.addMentionForDocId(docId, mention, mt);
      for (Entity entity : mention.getCandidateEntities()) {
        EntityTracer et = new EntityTracer(entity.getName());
        tracer.addEntityForMention(mention, entity.getName(), et);
      }
      entities.addAll(mention.getCandidateEntities());
    }

    logger.info(
        "Disambiguating '"
            + docId
            + "' ("
            + input.getMentions().getMentions().size()
            + " mentions, "
            + entities.size()
            + " entities)");

    if (includeNullAsEntityCandidate) {
      entities.setIncludesNmeEntities(true);
    }

    EnsembleMentionEntitySimilarity mes = null;
    try {
      mes = new EnsembleMentionEntitySimilarity(input.getMentions(), entities, ss, docId, tracer);
      return mes;
    } catch (Exception e) {
      e.printStackTrace();
      return null;
    }
  }
Beispiel #2
0
  protected void disambiguate(EnsembleMentionEntitySimilarity mes) throws Exception {
    for (Mention mention : input.getMentions().getMentions()) {
      List<ResultEntity> entities = new LinkedList<ResultEntity>();

      for (Entity entity : mention.getCandidateEntities()) {
        double sim = mes.calcSimilarity(mention, input.getContext(), entity);
        entities.add(new ResultEntity(entity.getName(), sim));
      }

      if (!entities.isEmpty()) {
        Collections.sort(entities);
      } else {
        entities.add(ResultEntity.getNoMatchingEntity());
      }

      Map<ResultMention, List<ResultEntity>> docSolutions = solutions.get(docId);
      if (docSolutions == null) {
        docSolutions = new HashMap<ResultMention, List<ResultEntity>>();
        solutions.put(docId, docSolutions);
      }

      ResultMention rm =
          new ResultMention(
              docId, mention.getMention(), mention.getCharOffset(), mention.getCharLength());

      docSolutions.put(rm, entities);
    }
  }
  @Override
  public double calcSimilarity(Entity a, Entity b, EntitiesContext entitiesContext) {
    kpc = (WeightedKeyphrasesContext) entitiesContext;

    Map<String, Double> matches = new HashMap<String, Double>();
    double dotprod = 0.0;

    int[] e1kps = kpc.getEntityKeyphraseIds(a);
    int[] e2kps = kpc.getEntityKeyphraseIds(b);

    TIntHashSet e1kpsForIntersect = new TIntHashSet(e1kps);
    TIntHashSet e2kpsForIntersect = new TIntHashSet(e2kps);
    e1kpsForIntersect.retainAll(e2kpsForIntersect);

    // iterate through intersection
    for (int kp : e1kpsForIntersect.toArray()) {
      double v1 = kpc.getCombinedKeyphraseMiIdfWeight(a, kp);
      double v2 = kpc.getCombinedKeyphraseMiIdfWeight(b, kp);

      if (v1 > 0 && v2 > 0) {
        double tmp = v1 * v2;
        dotprod += tmp;

        matches.put(kpc.getKeyphraseForId(kp), tmp);
      }
    }

    double norm1 = calcNorm(a, e1kps);
    double norm2 = calcNorm(b, e2kps);

    double sim = 0.0;
    double denom = norm1 * norm2;

    if (denom != 0) {
      sim = dotprod / denom;
    }

    if (!(tracer.eeTracing() instanceof NullEntityEntityTracing)) {
      Map<String, Double> e1keyphrases = new HashMap<String, Double>();
      for (int kp : e1kps) {
        if (kpc.getCombinedKeyphraseMiIdfWeight(a, kp) > 0.0) {
          e1keyphrases.put(kpc.getKeyphraseForId(kp), kpc.getCombinedKeyphraseMiIdfWeight(a, kp));
        }
      }
      e1keyphrases = CollectionUtils.sortMapByValue(e1keyphrases, true);
      Map<String, Double> e1top = new LinkedHashMap<String, Double>();
      for (Entry<String, Double> e : e1keyphrases.entrySet()) {
        e1top.put(e.getKey(), e.getValue());
      }
      e1keyphrases = e1top;

      Map<String, Double> e2keyphrases = new HashMap<String, Double>();
      for (int kp : e2kps) {
        if (kpc.getCombinedKeyphraseMiIdfWeight(b, kp) > 0.0) {
          e2keyphrases.put(kpc.getKeyphraseForId(kp), kpc.getCombinedKeyphraseMiIdfWeight(b, kp));
        }
      }
      e2keyphrases = CollectionUtils.sortMapByValue(e2keyphrases, true);
      Map<String, Double> e2top = new LinkedHashMap<String, Double>();
      for (Entry<String, Double> e : e2keyphrases.entrySet()) {
        e2top.put(e.getKey(), e.getValue());
      }
      e2keyphrases = e2top;

      Map<String, TermTracer> matchedKeywords = new HashMap<String, TermTracer>();
      for (String kp : matches.keySet()) {
        TermTracer tt = new TermTracer();
        tt.setTermWeight(matches.get(kp));
        matchedKeywords.put(kp, tt);
      }

      tracer.eeTracing().addEntityContext(a.getName(), e1keyphrases);
      tracer.eeTracing().addEntityContext(b.getName(), e2keyphrases);

      KeytermEntityEntityMeasureTracer mt =
          new KeytermEntityEntityMeasureTracer(
              "KeyphraseCosineSim", 0.0, e2keyphrases, matchedKeywords);
      mt.setScore(sim);
      tracer.eeTracing().addEntityEntityMeasureTracer(a.getName(), b.getName(), mt);

      KeytermEntityEntityMeasureTracer mt2 =
          new KeytermEntityEntityMeasureTracer(
              "KeyphraseCosineSim", 0.0, e1keyphrases, matchedKeywords);
      mt2.setScore(sim);
      tracer.eeTracing().addEntityEntityMeasureTracer(b.getName(), a.getName(), mt2);
    }

    return sim;
  }