@BeforeClass public static void setUpServices() throws IOException { openNLP = new OpenNLP(new ClasspathDataFileProvider("DUMMY_SYMBOLIC_NAME")); searcher = new TestSearcherImpl(NAME, SimpleTokenizer.INSTANCE); // add some terms to the searcher Representation rep = factory.createRepresentation("urn:test:PatrickMarshall"); rep.addNaturalText(NAME, "Patrick Marshall"); rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PERSON.getUnicodeString()); searcher.addEntity(rep); rep = factory.createRepresentation("urn:test:Geologist"); rep.addNaturalText(NAME, "Geologist"); rep.addReference(TYPE, NamespaceEnum.skos + "Concept"); rep.addReference(REDIRECT, "urn:test:redirect:Geologist"); searcher.addEntity(rep); // a redirect rep = factory.createRepresentation("urn:test:redirect:Geologist"); rep.addNaturalText(NAME, "Geologe (redirect)"); rep.addReference(TYPE, NamespaceEnum.skos + "Concept"); searcher.addEntity(rep); rep = factory.createRepresentation("urn:test:NewZealand"); rep.addNaturalText(NAME, "New Zealand"); rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString()); searcher.addEntity(rep); rep = factory.createRepresentation("urn:test:UniversityOfOtago"); rep.addNaturalText(NAME, "University of Otago"); rep.addReference(TYPE, OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString()); searcher.addEntity(rep); rep = factory.createRepresentation("urn:test:University"); rep.addNaturalText(NAME, "University"); rep.addReference(TYPE, NamespaceEnum.skos + "Concept"); searcher.addEntity(rep); rep = factory.createRepresentation("urn:test:Otago"); rep.addNaturalText(NAME, "Otago"); rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString()); searcher.addEntity(rep); // add a 2nd Otago (Place and University rep = factory.createRepresentation("urn:test:Otago_Texas"); rep.addNaturalText(NAME, "Otago (Texas)"); rep.addNaturalText(NAME, "Otago"); rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString()); searcher.addEntity(rep); rep = factory.createRepresentation("urn:test:UniversityOfOtago_Texas"); rep.addNaturalText(NAME, "University of Otago (Texas)"); rep.addReference(TYPE, OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString()); searcher.addEntity(rep); }
/** * @param literalFactory the LiteralFactory to use * @param graph the MGraph to use * @param contentItemId the contentItemId the enhancement is extracted from * @param relatedEnhancements enhancements this textAnnotation is related to * @param suggestion the entity suggestion * @param nameField the field used to extract the name * @param lang the preferred language to include or <code>null</code> if none */ public static UriRef writeEntityAnnotation( EnhancementEngine engine, LiteralFactory literalFactory, MGraph graph, UriRef contentItemId, Collection<NonLiteral> relatedEnhancements, Suggestion suggestion, String nameField, String lang) { Representation rep = suggestion.getEntity().getRepresentation(); // 1. extract the "best label" // Start with the matched one Text label = suggestion.getMatchedLabel(); // if the matched label is not in the requested language boolean langMatch = (lang == null && label.getLanguage() == null) || (label.getLanguage() != null && label.getLanguage().startsWith(lang)); // search if a better label is available for this Entity if (!langMatch) { Iterator<Text> labels = rep.getText(nameField); while (labels.hasNext() && !langMatch) { Text actLabel = labels.next(); langMatch = (lang == null && actLabel.getLanguage() == null) || (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith(lang)); if (langMatch) { // if the language matches -> // override the matched label label = actLabel; } } } // else the matched label will be the best to use Literal literal; if (label.getLanguage() == null) { literal = new PlainLiteralImpl(label.getText()); } else { literal = new PlainLiteralImpl(label.getText(), new Language(label.getLanguage())); } // Now create the entityAnnotation UriRef entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(graph, engine, contentItemId); // first relate this entity annotation to the text annotation(s) for (NonLiteral enhancement : relatedEnhancements) { graph.add(new TripleImpl(entityAnnotation, DC_RELATION, enhancement)); } UriRef entityUri = new UriRef(rep.getId()); // add the link to the referred entity graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entityUri)); // add the label parsed above graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, literal)); if (suggestion.getScore() != null) { graph.add( new TripleImpl( entityAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore()))); } Iterator<Reference> types = rep.getReferences(RDF_TYPE.getUnicodeString()); while (types.hasNext()) { graph.add( new TripleImpl( entityAnnotation, ENHANCER_ENTITY_TYPE, new UriRef(types.next().getReference()))); } // add the name of the ReferencedSite that manages the Entity if (suggestion.getEntity().getSite() != null) { graph.add( new TripleImpl( entityAnnotation, new UriRef(RdfResourceEnum.site.getUri()), new PlainLiteralImpl(suggestion.getEntity().getSite()))); } return entityAnnotation; }