예제 #1
0
 /**
  * @param text
  * @param pPropertyShortNameOrUri
  * @param element
  * @param function
  * @return
  */
 public Vote generateVote(
     String text, String pPropertyShortNameOrUri, OntologyElement element, String function) {
   String niceLabel = StringUtil.beautifyString(pPropertyShortNameOrUri);
   Vote vote = new Vote();
   long id = incrementer.incrementAndGet();
   vote.setId(id);
   SemanticConcept candidateSemanticConcept = new SemanticConcept();
   candidateSemanticConcept.setOntologyElement(element);
   // element.setFunction(function);
   candidateSemanticConcept.setFunction(function);
   // SemanticConcept clonedElement =
   // (SemanticConcept)candidateSemanticConcept.clone();
   vote.setCandidate(candidateSemanticConcept);
   // vote.setCandidate(clonedElement);
   // give more weight to monge
   try {
     double totalSimilarity = similarityCalculator.findSimilarity(text, niceLabel);
     vote.setVote(totalSimilarity);
   } catch (Exception e) {
     e.printStackTrace();
   }
   return vote;
 }
예제 #2
0
  /**
   * this method finds potential options based on the detected main subject; it is usually used when
   * none of the ontology-related annotations exist
   *
   * @param key
   * @return
   */
  public List<Vote> generateGenericVotes(
      SuggestionKey key, POC poc, boolean addNone, Integer max, List<SemanticConcept> toSkip) {
    Set<String> avoidThese = new HashSet<String>();
    if (toSkip != null)
      for (SemanticConcept concept : toSkip) {
        String uri = concept.getOntologyElement().getData().toString();
        avoidThese.add(uri);
      }
    String text = key.getText();
    List<Vote> votes = new ArrayList<Vote>();
    List<OntologyElement> elements;
    try {
      elements = findGenericOntologyElements(max);

      for (OntologyElement element : elements) {
        if (avoidThese.contains(element.getData().toString())) continue;
        element.setAnnotation(poc.getAnnotation());
        // run some similarity metric here and use some threshold...
        // if similarityScore>threshold then add this to the
        // clarificationOptions
        String pPropertyShortName = "";
        String suggestion = ((SerializableURI) element.getData()).toString();
        try {
          SerializableURI elementUri = new SerializableURI(suggestion, false);
          pPropertyShortName = elementUri.getResourceName();
        } catch (Exception e) {
          pPropertyShortName = suggestion;
        }
        String niceLabel = StringUtil.beautifyString(pPropertyShortName);
        Vote vote = new Vote();
        long id = incrementer.incrementAndGet();
        vote.setId(id);
        SemanticConcept candidateSemanticConcept = new SemanticConcept();
        candidateSemanticConcept.setOntologyElement(element);
        SemanticConcept clonedConcept = (SemanticConcept) candidateSemanticConcept.clone();
        vote.setCandidate(clonedConcept);
        double totalSimilarity = similarityCalculator.findSimilarity(text, niceLabel);
        vote.setVote(totalSimilarity);
        votes.add(vote);
        votes.addAll(addAdditionalVotes(element, poc, text, pPropertyShortName, false));
      }

      // here we add datatypeProperties with no domain etc.
      votes.addAll(
          generateVotesFromOntologyElements(findHangingElements(), avoidThese, poc, null, text));
    } catch (Exception e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    // add None element
    if (addNone) {
      Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet());
      vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation());
      votes.add(vote);
    }
    return votes;
  }
예제 #3
0
  /**
   * Finds clarification options for given interpretation by applying rules such as: match head with
   * some existing (datatype)property of the OntoRes which is first in the query (if there is such
   * OntoRes) what is city population of california? head: population modf: city unified two methods
   * on 27.02.2010. what is the largest city in california? (city is governor, largest is dependent)
   * s: is 'largest' related to: <list od datatype properties which are related to cities (in
   * california)>
   *
   * @param key
   * @return
   */
  public List<Vote> generateVotes(
      SuggestionKey key, POC poc, boolean addNone, List<SemanticConcept> toSkip) {
    logger.debug("Before generating suggestions, this many needs to be skipped: " + toSkip.size());
    long start = System.currentTimeMillis();
    Set<String> avoidThese = new HashSet<String>();
    if (toSkip != null)
      for (SemanticConcept concept : toSkip) {
        String uri = concept.getOntologyElement().getData().toString();
        avoidThese.add(uri);
      }
    long end = System.currentTimeMillis();
    List<Vote> votes = new ArrayList<Vote>();
    logger.info("Moved OCs to the list to avoid for " + (end - start) + "ms.");
    List<SemanticConcept> neighbours = key.getNearestNeighbours();
    String text = key.getText();
    Integer num = null;
    if (neighbours != null) num = neighbours.size();
    logger.info("Found " + num + " neighbours for " + key.getText());
    /*
     * this list ensures that the suggestions with an URI is added only once as if there are more than one
     * NNeighbours it can happen that the same suggestion is generated for each neighbour
     */
    Set<OntologyElement> allCandidateElements = new HashSet<OntologyElement>();
    for (SemanticConcept neighbour : neighbours) {
      start = System.currentTimeMillis();
      Set<OntologyElement> candidateElements = new HashSet<OntologyElement>();
      List<OntologyElement> tmpCandidates;
      try {
        tmpCandidates = findCandidates(neighbour, text);

        end = System.currentTimeMillis();
        logger.info("Finding candidates for:" + (end - start) + "ms");
        start = System.currentTimeMillis();
        for (OntologyElement el : tmpCandidates) {
          boolean elementAlreadyInSuggestions =
              elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el);
          if (!elementAlreadyInSuggestions) {
            candidateElements.add(el);
            allCandidateElements.add(el);
          }
          {
            logger.debug("Skipping, element already added:" + el.getData().toString());
          }
        }

      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      end = System.currentTimeMillis();
      logger.info("Checking whether candidates already added for:" + (end - start) + "ms");
      logger.info(
          "Found "
              + candidateElements.size()
              + " suggestions (function elements not counted) for NN:"
              + neighbour.getOntologyElement().getData().toString()
              + " and text:"
              + text
              + ", total up to now:"
              + allCandidateElements.size()
              + ", total to remove: "
              + avoidThese.size());
      start = System.currentTimeMillis();
      try {
        votes.addAll(
            generateVotesFromOntologyElements(
                new ArrayList(candidateElements), avoidThese, poc, neighbour, text));
      } catch (Exception e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      end = System.currentTimeMillis();
      logger.info(
          "Generating Ontology Elements for:"
              + (end - start)
              + "ms, "
              + votes.size()
              + " suggestions to be shown.");
      // ////////////////////////////////////////////////////
      // ////////add the closest element if the poc to be clarified is
      // adjective
      // //////////////////////////////////////////////////
      OntologyElement closestOntologyConcept = neighbour.getOntologyElement();
      // if (!candidateElements.contains(closestOntologyConcept)) {
      boolean elementAlreadyInSuggestions =
          elementAlreadyInSuggestions(new ArrayList(candidateElements), closestOntologyConcept);
      if (!elementAlreadyInSuggestions) {
        SerializableURI uriUri = null;
        String cocSuggestion = closestOntologyConcept.getData().toString();
        String cocUriOrLiteral = null;
        try {
          uriUri = new SerializableURI(cocSuggestion, false);
          cocUriOrLiteral = uriUri.getResourceName();
        } catch (Exception e) {
          cocUriOrLiteral = cocSuggestion;
        }
        votes.addAll(addAdditionalVotes(closestOntologyConcept, poc, text, cocUriOrLiteral, true));
      }
    }
    start = System.currentTimeMillis();
    // here we add datatypeProperties with no domain etc.
    List<OntologyElement> hangingElements = new ArrayList<OntologyElement>();
    try {
      hangingElements.addAll(findHangingElements());
    } catch (Exception e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
    List filteredHangingElements = new ArrayList();

    for (OntologyElement el : hangingElements) {
      boolean elementAlreadyInSuggestions =
          elementAlreadyInSuggestions(new ArrayList(allCandidateElements), el);
      if (!elementAlreadyInSuggestions) filteredHangingElements.add(el);
    }
    try {
      votes.addAll(
          generateVotesFromOntologyElements(filteredHangingElements, avoidThese, poc, null, text));
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    end = System.currentTimeMillis();
    logger.info("Added hanging Elements for:" + (end - start) + "ms");
    // add None element
    if (addNone) {
      Vote vote = VoteGenerator.generateNoneVote(incrementer.incrementAndGet());
      vote.getCandidate().getOntologyElement().setAnnotation(poc.getAnnotation());
      votes.add(vote);
    }
    // now call wordnet to recalculate scores
    return votes;
  }