@BeforeClass
 public static void setUpServices() throws IOException {
   openNLP = new OpenNLP(new ClasspathDataFileProvider("DUMMY_SYMBOLIC_NAME"));
   searcher = new TestSearcherImpl(NAME, SimpleTokenizer.INSTANCE);
   // add some terms to the searcher
   Representation rep = factory.createRepresentation("urn:test:PatrickMarshall");
   rep.addNaturalText(NAME, "Patrick Marshall");
   rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PERSON.getUnicodeString());
   searcher.addEntity(rep);
   rep = factory.createRepresentation("urn:test:Geologist");
   rep.addNaturalText(NAME, "Geologist");
   rep.addReference(TYPE, NamespaceEnum.skos + "Concept");
   rep.addReference(REDIRECT, "urn:test:redirect:Geologist");
   searcher.addEntity(rep);
   // a redirect
   rep = factory.createRepresentation("urn:test:redirect:Geologist");
   rep.addNaturalText(NAME, "Geologe (redirect)");
   rep.addReference(TYPE, NamespaceEnum.skos + "Concept");
   searcher.addEntity(rep);
   rep = factory.createRepresentation("urn:test:NewZealand");
   rep.addNaturalText(NAME, "New Zealand");
   rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
   searcher.addEntity(rep);
   rep = factory.createRepresentation("urn:test:UniversityOfOtago");
   rep.addNaturalText(NAME, "University of Otago");
   rep.addReference(TYPE, OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString());
   searcher.addEntity(rep);
   rep = factory.createRepresentation("urn:test:University");
   rep.addNaturalText(NAME, "University");
   rep.addReference(TYPE, NamespaceEnum.skos + "Concept");
   searcher.addEntity(rep);
   rep = factory.createRepresentation("urn:test:Otago");
   rep.addNaturalText(NAME, "Otago");
   rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
   searcher.addEntity(rep);
   // add a 2nd Otago (Place and University
   rep = factory.createRepresentation("urn:test:Otago_Texas");
   rep.addNaturalText(NAME, "Otago (Texas)");
   rep.addNaturalText(NAME, "Otago");
   rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
   searcher.addEntity(rep);
   rep = factory.createRepresentation("urn:test:UniversityOfOtago_Texas");
   rep.addNaturalText(NAME, "University of Otago (Texas)");
   rep.addReference(TYPE, OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString());
   searcher.addEntity(rep);
 }
示例#2
0
  /**
   * @param literalFactory the LiteralFactory to use
   * @param graph the MGraph to use
   * @param contentItemId the contentItemId the enhancement is extracted from
   * @param relatedEnhancements enhancements this textAnnotation is related to
   * @param suggestion the entity suggestion
   * @param nameField the field used to extract the name
   * @param lang the preferred language to include or <code>null</code> if none
   */
  public static UriRef writeEntityAnnotation(
      EnhancementEngine engine,
      LiteralFactory literalFactory,
      MGraph graph,
      UriRef contentItemId,
      Collection<NonLiteral> relatedEnhancements,
      Suggestion suggestion,
      String nameField,
      String lang) {
    Representation rep = suggestion.getEntity().getRepresentation();
    // 1. extract the "best label"
    // Start with the matched one
    Text label = suggestion.getMatchedLabel();
    // if the matched label is not in the requested language
    boolean langMatch =
        (lang == null && label.getLanguage() == null)
            || (label.getLanguage() != null && label.getLanguage().startsWith(lang));
    // search if a better label is available for this Entity
    if (!langMatch) {
      Iterator<Text> labels = rep.getText(nameField);
      while (labels.hasNext() && !langMatch) {
        Text actLabel = labels.next();
        langMatch =
            (lang == null && actLabel.getLanguage() == null)
                || (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith(lang));
        if (langMatch) { // if the language matches ->
          // override the matched label
          label = actLabel;
        }
      }
    } // else the matched label will be the best to use
    Literal literal;
    if (label.getLanguage() == null) {
      literal = new PlainLiteralImpl(label.getText());
    } else {
      literal = new PlainLiteralImpl(label.getText(), new Language(label.getLanguage()));
    }
    // Now create the entityAnnotation
    UriRef entityAnnotation =
        EnhancementEngineHelper.createEntityEnhancement(graph, engine, contentItemId);
    // first relate this entity annotation to the text annotation(s)
    for (NonLiteral enhancement : relatedEnhancements) {
      graph.add(new TripleImpl(entityAnnotation, DC_RELATION, enhancement));
    }
    UriRef entityUri = new UriRef(rep.getId());
    // add the link to the referred entity
    graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entityUri));
    // add the label parsed above
    graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, literal));
    if (suggestion.getScore() != null) {
      graph.add(
          new TripleImpl(
              entityAnnotation,
              ENHANCER_CONFIDENCE,
              literalFactory.createTypedLiteral(suggestion.getScore())));
    }

    Iterator<Reference> types = rep.getReferences(RDF_TYPE.getUnicodeString());
    while (types.hasNext()) {
      graph.add(
          new TripleImpl(
              entityAnnotation, ENHANCER_ENTITY_TYPE, new UriRef(types.next().getReference())));
    }
    // add the name of the ReferencedSite that manages the Entity
    if (suggestion.getEntity().getSite() != null) {
      graph.add(
          new TripleImpl(
              entityAnnotation,
              new UriRef(RdfResourceEnum.site.getUri()),
              new PlainLiteralImpl(suggestion.getEntity().getSite())));
    }

    return entityAnnotation;
  }