Пример #1
0
  /**
   * this method finds potential options based on the detected main subject; it is usually used when
   * none of the ontology-related annotations exist
   *
   * @param key
   * @return
   */
  public List<Vote> generateGenericVotes(
      SuggestionKey key, POC poc, boolean addNone, Integer max, List<SemanticConcept> toSkip) {
    Set<String> avoidThese = new HashSet<String>();
    if (toSkip != null)
      for (SemanticConcept concept : toSkip) {
        String uri = concept.getOntologyElement().getData().toString();
        avoidThese.add(uri);
      }
    String text = key.getText();
    List<Vote> votes = new ArrayList<Vote>();
    List<OntologyElement> elements;
    try {
      elements = findGenericOntologyElements(max);

      for (OntologyElement element : elements) {
        if (avoidThese.contains(element.getData().toString())) continue;
        element.setAnnotation(poc.getAnnotation());
        // run some similarity metric here and use some threshold...
        // if similarityScore>threshold then add this to the
        // clarificationOptions
        String pPropertyShortName = "";
        String suggestion = ((SerializableURI) element.getData()).toString();
        try {
          SerializableURI elementUri = new SerializableURI(suggestion, false);
          pPropertyShortName = elementUri.getResourceName();
        } catch (Exception e) {
          pPropertyShortName = suggestion;
        }
        String niceLabel = StringUtil.beautifyString(pPropertyShortName);
        Vote vote = new Vote();
        long id = incrementer.incrementAndGet();
        vote.setId(id);
        SemanticConcept candidateSemanticConcept = new SemanticConcept();
        candidateSemanticConcept.setOntologyElement(element);
        SemanticConcept clonedConcept = (SemanticConcept) candidateSemanticConcept.clone();
        vote.setCandidate(clonedConcept);
        double totalSimilarity = similarityCalculator.findSimilarity(text, niceLabel);
        vote.setVote(totalSimilarity);
        votes.add(vote);
        votes.addAll(addAdditionalVotes(element, poc, text, pPropertyShortName, false));
      }

      // here we add datatypeProperties with no domain etc.
      votes.addAll(
          generateVotesFromOntologyElements(findHangingElements(), avoidThese, poc, null, text));
    } catch (Exception e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    // add None element
    if (addNone) {
      Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet());
      vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation());
      votes.add(vote);
    }
    return votes;
  }
Пример #2
0
 private List<OntologyElement> findCandidatesForADTPV(
     DatatypePropertyValueElement el, String text) {
   // Set<String> allSuggestions = new HashSet<String>();
   long start = System.currentTimeMillis();
   DatatypePropertyValueIdentifier dtpv = (DatatypePropertyValueIdentifier) el.getData();
   List<SerializableURI> instanceUris = dtpv.getInstanceURIs();
   Set<String> allClassUris = new HashSet<String>();
   for (SerializableURI uri : instanceUris) {
     List<String> dTypes = luceneAnnotator.findDirectTypes(uri.toString());
     allClassUris.addAll(dTypes);
   }
   long end = System.currentTimeMillis();
   logger.info(
       "Lucene finished findDirectTypes for "
           + (end - start)
           + "ms and found:"
           + allClassUris.size()
           + " dTypes");
   List<OntologyElement> elements = new ArrayList();
   Set<String> properties = new HashSet();
   start = System.currentTimeMillis();
   for (String classUri : allClassUris) {
     properties.addAll(findCandidatesForClass(classUri));
   }
   end = System.currentTimeMillis();
   logger.info(
       "findCandidatesForClass for "
           + (end - start)
           + "ms and found:"
           + properties.size()
           + " prop uris");
   start = System.currentTimeMillis();
   try {
     elements.addAll(returnPropertyElements(properties));
   } catch (Exception e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
   end = System.currentTimeMillis();
   logger.info(
       "returnPropertyElements for "
           + (end - start)
           + "ms and found:"
           + properties.size()
           + " prop uris");
   return elements;
 }
Пример #3
0
 /**
  * @param candidateElements
  * @param avoidThese
  * @param poc
  * @param neighbour
  * @param text
  * @return
  */
 List<Vote> generateVotesFromOntologyElements(
     List<OntologyElement> candidateElements,
     Set<String> avoidThese,
     POC poc,
     SemanticConcept neighbour,
     String text)
     throws Exception {
   List<Vote> votes = new ArrayList<Vote>();
   List<String> listOfSuggestionUris = new ArrayList<String>();
   List<DatatypePropertyValueIdentifier> listOfDTPVIdentifiers =
       new ArrayList<DatatypePropertyValueIdentifier>();
   for (OntologyElement element : candidateElements) {
     // skip adding suggestions which are already in ocs
     if (avoidThese.contains(element.getData().toString())) continue;
     // run some similarity metric here and use some threshold...
     // if similarityScore>threshold then add this to the
     // clarificationOptions
     String pPropertyShortNameOrUri = null;
     String suggestion = ((SerializableURI) element.getData()).toString();
     try {
       SerializableURI elementUri = new SerializableURI(suggestion, false);
       pPropertyShortNameOrUri = elementUri.getResourceName();
     } catch (Exception e) {
       pPropertyShortNameOrUri = suggestion;
     }
     if (!listOfSuggestionUris.contains(suggestion)) {
       listOfSuggestionUris.add(suggestion);
       if (poc.getAnnotation() != null) element.setAnnotation(poc.getAnnotation());
       // check whether the element is datatype property and add
       // the governor
       if (neighbour != null) element = addGovernor(element, neighbour.getOntologyElement());
       /* cloning elements */
       Vote vote = generateVote(text, pPropertyShortNameOrUri, element, null);
       votes.add(vote);
       votes.addAll(addAdditionalVotes(element, poc, text, pPropertyShortNameOrUri, false));
     }
   }
   return votes;
 }
Пример #4
0
  /**
   * Finds clarification options for given interpretation by applying rules such as: match head with
   * some existing (datatype)property of the OntoRes which is first in the query (if there is such
   * OntoRes) what is city population of california? head: population modf: city unified two methods
   * on 27.02.2010. what is the largest city in california? (city is governor, largest is dependent)
   * s: is 'largest' related to: <list od datatype properties which are related to cities (in
   * california)>
   *
   * @param key
   * @return
   */
  public List<Vote> generateVotes(
      SuggestionKey key, POC poc, boolean addNone, List<SemanticConcept> toSkip) {
    logger.debug("Before generating suggestions, this many needs to be skipped: " + toSkip.size());
    long start = System.currentTimeMillis();
    Set<String> avoidThese = new HashSet<String>();
    if (toSkip != null)
      for (SemanticConcept concept : toSkip) {
        String uri = concept.getOntologyElement().getData().toString();
        avoidThese.add(uri);
      }
    long end = System.currentTimeMillis();
    List<Vote> votes = new ArrayList<Vote>();
    logger.info("Moved OCs to the list to avoid for " + (end - start) + "ms.");
    List<SemanticConcept> neighbours = key.getNearestNeighbours();
    String text = key.getText();
    Integer num = null;
    if (neighbours != null) num = neighbours.size();
    logger.info("Found " + num + " neighbours for " + key.getText());
    /*
     * this list ensures that the suggestions with an URI is added only once as if there are more than one
     * NNeighbours it can happen that the same suggestion is generated for each neighbour
     */
    Set<OntologyElement> allCandidateElements = new HashSet<OntologyElement>();
    for (SemanticConcept neighbour : neighbours) {
      start = System.currentTimeMillis();
      Set<OntologyElement> candidateElements = new HashSet<OntologyElement>();
      List<OntologyElement> tmpCandidates;
      try {
        tmpCandidates = findCandidates(neighbour, text);

        end = System.currentTimeMillis();
        logger.info("Finding candidates for:" + (end - start) + "ms");
        start = System.currentTimeMillis();
        for (OntologyElement el : tmpCandidates) {
          boolean elementAlreadyInSuggestions =
              elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el);
          if (!elementAlreadyInSuggestions) {
            candidateElements.add(el);
            allCandidateElements.add(el);
          }
          {
            logger.debug("Skipping, element already added:" + el.getData().toString());
          }
        }

      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      end = System.currentTimeMillis();
      logger.info("Checking whether candidates already added for:" + (end - start) + "ms");
      logger.info(
          "Found "
              + candidateElements.size()
              + " suggestions (function elements not counted) for NN:"
              + neighbour.getOntologyElement().getData().toString()
              + " and text:"
              + text
              + ", total up to now:"
              + allCandidateElements.size()
              + ", total to remove: "
              + avoidThese.size());
      start = System.currentTimeMillis();
      try {
        votes.addAll(
            generateVotesFromOntologyElements(
                new ArrayList(candidateElements), avoidThese, poc, neighbour, text));
      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      end = System.currentTimeMillis();
      logger.info(
          "Generating Ontology Elements for:"
              + (end - start)
              + "ms, "
              + votes.size()
              + " suggestions to be shown.");
      // ////////////////////////////////////////////////////
      // ////////add the closest element if the poc to be clarified is
      // adjective
      // //////////////////////////////////////////////////
      OntologyElement closestOntologyConcept = neighbour.getOntologyElement();
      // if (!candidateElements.contains(closestOntologyConcept)) {
      boolean elementAlreadyInSuggestions =
          elementAlreadyInSuggestions(new ArrayList(candidateElements), closestOntologyConcept);
      if (!elementAlreadyInSuggestions) {
        SerializableURI uriUri = null;
        String cocSuggestion = closestOntologyConcept.getData().toString();
        String cocUriOrLiteral = null;
        try {
          uriUri = new SerializableURI(cocSuggestion, false);
          cocUriOrLiteral = uriUri.getResourceName();
        } catch (Exception e) {
          cocUriOrLiteral = cocSuggestion;
        }
        votes.addAll(addAdditionalVotes(closestOntologyConcept, poc, text, cocUriOrLiteral, true));
      }
    }
    start = System.currentTimeMillis();
    // here we add datatypeProperties with no domain etc.
    List<OntologyElement> hangingElements = new ArrayList<OntologyElement>();
    try {
      hangingElements.addAll(findHangingElements());
    } catch (Exception e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    List filteredHangingElements = new ArrayList();

    for (OntologyElement el : hangingElements) {
      boolean elementAlreadyInSuggestions =
          elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el);
      if (!elementAlreadyInSuggestions) filteredHangingElements.add(el);
    }
    try {
      votes.addAll(
          generateVotesFromOntologyElements(filteredHangingElements, avoidThese, poc, null, text));
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    end = System.currentTimeMillis();
    logger.info("Added hanging Elements for:" + (end - start) + "ms");
    // add None element
    if (addNone) {
      Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet());
      vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation());
      votes.add(vote);
    }
    // now call wordnet to recalculate scores
    return votes;
  }