private EnsembleMentionEntitySimilarity prepapreMES() { Entities entities = new Entities(); for (Mention mention : input.getMentions().getMentions()) { MentionTracer mt = new MentionTracer(mention); tracer.addMentionForDocId(docId, mention, mt); for (Entity entity : mention.getCandidateEntities()) { EntityTracer et = new EntityTracer(entity.getName()); tracer.addEntityForMention(mention, entity.getName(), et); } entities.addAll(mention.getCandidateEntities()); } logger.info( "Disambiguating '" + docId + "' (" + input.getMentions().getMentions().size() + " mentions, " + entities.size() + " entities)"); if (includeNullAsEntityCandidate) { entities.setIncludesNmeEntities(true); } EnsembleMentionEntitySimilarity mes = null; try { mes = new EnsembleMentionEntitySimilarity(input.getMentions(), entities, ss, docId, tracer); return mes; } catch (Exception e) { e.printStackTrace(); return null; } }
protected void disambiguate(EnsembleMentionEntitySimilarity mes) throws Exception { for (Mention mention : input.getMentions().getMentions()) { List<ResultEntity> entities = new LinkedList<ResultEntity>(); for (Entity entity : mention.getCandidateEntities()) { double sim = mes.calcSimilarity(mention, input.getContext(), entity); entities.add(new ResultEntity(entity.getName(), sim)); } if (!entities.isEmpty()) { Collections.sort(entities); } else { entities.add(ResultEntity.getNoMatchingEntity()); } Map<ResultMention, List<ResultEntity>> docSolutions = solutions.get(docId); if (docSolutions == null) { docSolutions = new HashMap<ResultMention, List<ResultEntity>>(); solutions.put(docId, docSolutions); } ResultMention rm = new ResultMention( docId, mention.getMention(), mention.getCharOffset(), mention.getCharLength()); docSolutions.put(rm, entities); } }
@Test public void sentencesTokensTest() { Preparator p = new Preparator(); PreparedInput input = p.prepare( "temp", "This is a sentence. And another one. Just for the kicks.", new StanfordManualPreparationSettings()); Tokens ts = input.getTokens(); List<List<Token>> sentences = ts.getSentenceTokens(); assertEquals(3, sentences.size()); assertEquals("This", sentences.get(0).get(0).getOriginal()); assertEquals("sentence", sentences.get(0).get(3).getOriginal()); assertEquals(".", sentences.get(0).get(4).getOriginal()); assertEquals("another", sentences.get(1).get(1).getOriginal()); assertEquals("kicks", sentences.get(2).get(3).getOriginal()); }
@Override public void run() { long beginTime = System.currentTimeMillis(); try { AidaManager.fillInCandidateEntities( docId, input.getMentions(), includeNullAsEntityCandidate, includeContextMentions); } catch (SQLException e) { logger.error("SQLException when getting candidates: " + e.getLocalizedMessage()); } EnsembleMentionEntitySimilarity mes = prepapreMES(); try { disambiguate(mes); } catch (Exception e) { logger.error("Error: " + e.getLocalizedMessage()); } double runTime = (System.currentTimeMillis() - beginTime) / (double) 1000; logger.info("Document '" + docId + "' done in " + nf.format(runTime) + "s"); }
public LocalDisambiguation( PreparedInput input, SimilaritySettings settings, boolean includeNullAsEntityCandidate, boolean includeContextMentions, String docId, Map<String, Map<ResultMention, List<ResultEntity>>> solutions, Tracer tracer) throws SQLException { nf = NumberFormat.getNumberInstance(Locale.ENGLISH); nf.setMaximumFractionDigits(2); logger.debug( "Preparing '" + docId + "' (" + input.getMentions().getMentions().size() + " mentions)"); this.ss = settings; this.docId = docId; this.solutions = solutions; this.input = input; this.includeNullAsEntityCandidate = includeNullAsEntityCandidate; this.includeContextMentions = includeContextMentions; this.tracer = tracer; logger.debug("Finished preparing '" + docId + "'"); }