public Set<String> findCandidatesForClass(String classUri) { Set<String> listOfPotentialCandidates = new HashSet<String>(); listOfPotentialCandidates.addAll( luceneAnnotator.getDefinedPropertiesWhereClassIsADomain(classUri, forceSuperClasses)); listOfPotentialCandidates.addAll( luceneAnnotator.getDefinedPropertiesWhereClassIsARange(classUri, forceSuperClasses)); Set<String> filteredList = new HashSet<String>(); for (String t : listOfPotentialCandidates) { if (!isInIgnoreNameSpaceList(t)) { filteredList.add(t); } } return filteredList; }
/** * @param dTypes * @return */ List<String> filterDirectTypes(List<String> dTypes) { List<String> filteredList = new ArrayList(); List<String> toRemove = new ArrayList(); if (dTypes != null && dTypes.size() > 1) { // Map<String, Set<String>> subClasses = ontology2Map.getSubClasses(); List<String> copyOfDtypes = new ArrayList(); copyOfDtypes.addAll(dTypes); for (String classUri : dTypes) { Set<String> subClassesOfThis = luceneAnnotator.findSubClasses(classUri); // subClasses.get(classUri); for (String uri : copyOfDtypes) { if (subClassesOfThis.contains(uri) && !classUri.equals(uri)) { // remove this toRemove.add(uri); } } // classUri subclass DTypes[i]; ostaje // classUri superclass DTypes[i]; remove classUri // OVO NE TREBA DA BUDE OVDE; PROBAJ DA TO UBACIS TAMO KAD TRAZIS // SUGGESTIONS ILI RAZMISLI GDE JE VEC NAJBOLJE } } filteredList.addAll(dTypes); filteredList.removeAll(toRemove); return filteredList; }
/** * finds elements which are the top most in the ontology... * * @return */ private List<OntologyElement> findGenericOntologyElements(Integer max) throws Exception { List<OntologyElement> elements = new ArrayList<OntologyElement>(); Set<String> suggestions = luceneAnnotator.findPropertyURIs(max); // now remove all that are after max List<String> list = new ArrayList<String>(suggestions); if (suggestions != null && suggestions.size() > max) { for (int i = max; i < suggestions.size(); i++) { String prop = list.get(i); suggestions.remove(prop); } } elements.addAll(returnPropertyElements(suggestions)); long start = System.currentTimeMillis(); Set<String> classUris = luceneAnnotator.findTopClasses(); long end = System.currentTimeMillis(); logger.info("Finished searching lucene for top classes for:" + (end - start) + "ms."); if (classUris == null) classUris = new HashSet(); logger.info("Found " + classUris.size() + " top classes."); // now remove all that are after max List<String> newList = new ArrayList<String>(classUris); if (newList.size() > max) { for (int i = max; i < newList.size(); i++) { String classUri = newList.get(i); classUris.remove(classUri); } } for (String euri : classUris) { SerializableURI elementUri = null; try { elementUri = new SerializableURI(euri, false); } catch (Exception e) { e.printStackTrace(); } OntologyElement e = new ClassElement(); e.setData(elementUri); elements.add(e); } return elements; }
/** * find properties with no domain or range defined and also add some generic elements like label * or type * * @return */ List<OntologyElement> findHangingElements() throws Exception { List<OntologyElement> elements = new ArrayList<OntologyElement>(); Set<String> properties = new HashSet<String>(); properties.add(RDFS.LABEL.toString()); properties.add(RDF.TYPE.toString()); String table = suggestionsHelper.getDatatypePropertiesNoDomain(); properties.addAll(StringUtil.fromStringToSet(table)); String thingUri = "http://www.w3.org/2002/07/owl#Thing"; properties.addAll( luceneAnnotator.getDefinedPropertiesWhereClassIsADomain(thingUri, forceSuperClasses)); properties.addAll( luceneAnnotator.getDefinedPropertiesWhereClassIsARange(thingUri, forceSuperClasses)); elements.addAll(returnPropertyElements(properties)); logger.info("Adding:" + properties.size() + " hanging properties"); Set<String> classes = new HashSet<String>(); classes.add(thingUri); elements.addAll(returnClassElements(classes)); return elements; }
private List<OntologyElement> findCandidatesForADTPV( DatatypePropertyValueElement el, String text) { // Set<String> allSuggestions = new HashSet<String>(); long start = System.currentTimeMillis(); DatatypePropertyValueIdentifier dtpv = (DatatypePropertyValueIdentifier) el.getData(); List<SerializableURI> instanceUris = dtpv.getInstanceURIs(); Set<String> allClassUris = new HashSet<String>(); for (SerializableURI uri : instanceUris) { List<String> dTypes = luceneAnnotator.findDirectTypes(uri.toString()); allClassUris.addAll(dTypes); } long end = System.currentTimeMillis(); logger.info( "Lucene finished findDirectTypes for " + (end - start) + "ms and found:" + allClassUris.size() + " dTypes"); List<OntologyElement> elements = new ArrayList(); Set<String> properties = new HashSet(); start = System.currentTimeMillis(); for (String classUri : allClassUris) { properties.addAll(findCandidatesForClass(classUri)); } end = System.currentTimeMillis(); logger.info( "findCandidatesForClass for " + (end - start) + "ms and found:" + properties.size() + " prop uris"); start = System.currentTimeMillis(); try { elements.addAll(returnPropertyElements(properties)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } end = System.currentTimeMillis(); logger.info( "returnPropertyElements for " + (end - start) + "ms and found:" + properties.size() + " prop uris"); return elements; }
/** * if element is datatype property then add min, max and sum as options * * @param element * @param poc * @return */ List<Vote> addAdditionalVotes( OntologyElement element, POC poc, String text, String propertyUri, boolean alreadyAdded) { // if element is datatype property add suggestions with min, // max, sum List<Vote> votes = new ArrayList<Vote>(); // logger.info("Adding additional votes(min,max,etc), poc is:"+poc.toString()); if (element instanceof PropertyElement) { PropertyElement clonedElement = new PropertyElement(); String thisPropertyUri = ((PropertyElement) element).getData().toString(); // logger.info("thisPropertyUri "+thisPropertyUri); if (luceneAnnotator.isItDatatypeProperty(thisPropertyUri)) { ((PropertyElement) clonedElement).setDatatypeProperty(true); boolean containsJJ = false; containsJJ = treeUtils.pocContainsJJs(poc); // logger.info("Checking whether it contains JJ*..." + containsJJ); if (containsJJ) { if (alreadyAdded) clonedElement.setAlreadyAdded(true); else clonedElement.setAlreadyAdded(false); clonedElement.setAnnotation(element.getAnnotation()); clonedElement.setData(element.getData()); if (((PropertyElement) element).getRange() != null) clonedElement.setRange(((PropertyElement) element).getRange()); if (((PropertyElement) element).getDomain() != null) clonedElement.setDomain(((PropertyElement) element).getDomain()); if (((PropertyElement) element).getGovernor() != null) clonedElement.setGovernor(((PropertyElement) element).getGovernor()); Vote vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.MAX_FUNCTION); votes.add(vote); vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.MIN_FUNCTION); votes.add(vote); vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.SUM_FUNCTION); votes.add(vote); vote = generateVote(text, propertyUri, clonedElement, FreyaConstants.AVG_FUNCTION); votes.add(vote); } } } return votes; }
/** * generate property elements from the set of property uris, and check whether these properties * are datatype so that additional flag can be added to the property elements * * @param classes * @return */ List<OntologyElement> returnPropertyElements(Set<String> properties) throws Exception { if (properties == null) return new ArrayList(); List<OntologyElement> elements = new ArrayList<OntologyElement>(); for (String euri : properties) { // logger.info("Checking if datatypeproperty=true for: " + euri); SerializableURI elementUri = null; try { elementUri = new SerializableURI(euri, false); } catch (Exception e) { e.printStackTrace(); } OntologyElement e = new PropertyElement(); // so that later we can distinguish property elements if they refer // to datatypes if (luceneAnnotator.isItDatatypeProperty(euri)) { ((PropertyElement) e).setDatatypeProperty(true); } e.setData(elementUri); elements.add(((PropertyElement) e)); } return elements; }
/** * checks whether the element is property and adds governor if yes * * @param pElement * @param governor * @return */ OntologyElement addGovernor(OntologyElement pElement, OntologyElement governor) { PropertyElement property = null; if (pElement instanceof PropertyElement) { property = (PropertyElement) pElement; // logger.debug("Property found:" + property.getData().toString() // + "...is it datatype?" + property.isDatatypeProperty()); if (property.isDatatypeProperty()) { // set governor for that element to be the // neighbour.getOntologyElement() property.setGovernor(governor); // logger.debug("Setting governor:" + governor.toString() // + " for element: " + property.getData().toString()); String uri = ((SerializableURI) property.getData()).toString(); Set<String> range = luceneAnnotator.findPropertyRange(uri); String firstOne = null; if (range != null && range.size() > 0) { firstOne = (String) (new ArrayList(range)).get(0); property.setRange(firstOne); } } } if (property == null) return pElement; else return property; }
/** * find candidate ontology elements * * @return */ private List<OntologyElement> findCandidates(SemanticConcept sc, String text) throws Exception { // Set<String> allSuggestions = new HashSet<String>(); OntologyElement el = sc.getOntologyElement(); // what is the population of california? // what is the population of cities in california? // what is the highest point of california? // String uri = null; Set<String> classUris = new HashSet<String>(); if (el instanceof InstanceElement) { List<String> allClassUris = ((InstanceElement) el).getClassURIList(); logger.info("There were:" + allClassUris.size() + " direct types"); // classUris = filterDirectTypes(allClassUris); classUris = new HashSet(allClassUris); logger.info("There is NO FILTERING of direct types..."); } else if (el instanceof InstanceListElement) { List allClassUris = ((InstanceListElement) el).getClassURIList(); logger.info("There were:" + allClassUris.size() + " direct types"); // classUris = filterDirectTypes(allClassUris); classUris = new HashSet(allClassUris); logger.info("There is NO FILTERING of direct types..."); } else if (el instanceof ClassElement) { String uri = ((SerializableURI) el.getData()).toString(); classUris.add(uri); logger.info("NN is class" + uri); } else if (el instanceof DatatypePropertyValueElement) { logger.info("NN is DPVE " + el.getData()); return findCandidatesForADTPV(((DatatypePropertyValueElement) el), text); } else if (el instanceof PropertyElement) { logger.info("NN is property " + el.getData()); return findCandidatesForAProperty(((PropertyElement) el), text); } Set<OntologyElement> elements = new HashSet<OntologyElement>(); Set<String> listOfPotentialCandidates = new HashSet<String>(); Set<String> classes = new HashSet<String>(); long start = System.currentTimeMillis(); for (String uri : classUris) { // //////////properties first////////////////////////// listOfPotentialCandidates = findCandidatesForClass(uri); elements.addAll(returnPropertyElements(listOfPotentialCandidates)); // allSuggestions.addAll(listOfPotentialCandidates); } logger.info("Found " + elements.size() + " elements so far."); // //////////////////////////////////// for (String uri : classUris) { classes.addAll( luceneAnnotator.getNeighbouringClassesWhereGivenClassIsADomain(uri, forceSuperClasses)); logger.info("Found " + classes.size() + " class candidates from IsADomain method."); classes.addAll( luceneAnnotator.getNeighbouringClassesWhereGivenClassIsARange(uri, forceSuperClasses)); logger.info( "Found more, now total: " + classes.size() + " class candidates from IsADomain and isARange method."); } Set<String> filteredClasses = new HashSet<String>(); for (String t : classes) { if (!isInIgnoreNameSpaceList(t)) { filteredClasses.add(t); } } elements.addAll(returnClassElements(filteredClasses)); long end = System.currentTimeMillis(); logger.info( "Found " + elements.size() + " cadidates for " + classUris.size() + " classes for " + (end - start) + "ms."); return new ArrayList(elements); }
/** * @param property * @param text * @return */ private List<OntologyElement> findCandidatesForAProperty(PropertyElement property, String text) throws Exception { // Set<String> allSuggestions = new HashSet<String>(); String uri = ((SerializableURI) property.getData()).toString(); logger.debug("Finding candidates for property:" + property.getData().toString()); // all elements List<OntologyElement> elements = new ArrayList<OntologyElement>(); // feed classes: these are used to find more suggestions but they are also // added to the list! Set<String> feedClasses = new HashSet<String>(); Set<String> properties = new HashSet<String>(); Set<String> allClasses = new HashSet<String>(); Set<String> rangeClasses = luceneAnnotator.findPropertyRange(uri); feedClasses.addAll(rangeClasses); logger.debug("getRangeClassesForProperty:" + uri + " found " + rangeClasses.size()); Set<String> domainClasses = luceneAnnotator.findPropertyDomain(uri); feedClasses.addAll(domainClasses); logger.debug("getDomainClassesForProperty:" + uri + " found " + domainClasses.size()); if (feedClasses == null || feedClasses.size() <= 1) { // find top classes logger.debug("Number of feed classes is 0....forceSuggestions=" + forceSuggestions); if (new Boolean(forceSuggestions).booleanValue() == true) { feedClasses.addAll(luceneAnnotator.findClassURIs()); } } // a onda za te classes nadji properties where class is a domain/range for (String classUri : feedClasses) { properties.addAll( luceneAnnotator.getDefinedPropertiesWhereClassIsADomain(classUri, forceSuperClasses)); properties.addAll( luceneAnnotator.getDefinedPropertiesWhereClassIsARange(classUri, forceSuperClasses)); } // if (new Boolean(forceSuggestions).booleanValue() == true) { logger.info( "cheating!!!!!!!!!!!!!!!!!!!!!!!!!!! here you should not get ALL properties but only relevant ones"); Set<String> datatypePropertiesList = luceneAnnotator.findDatatypePropertyURIs(); Set<String> objectPropertiesList = luceneAnnotator.findObjectPropertyURIs(); Set<String> rdfPropertiesList = luceneAnnotator.findRDFPropertyURIs(null); properties.addAll(datatypePropertiesList); properties.addAll(objectPropertiesList); properties.addAll(rdfPropertiesList); } Set<String> filteredProperties = new HashSet<String>(); for (String t : properties) { if (!isInIgnoreNameSpaceList(t)) { filteredProperties.add(t); } } // add datatype properties which do not have domain defined: mb endDate // beginDate // String table = getSparqlUtils().getDatatypePropertiesNoDomain(); // properties.addAll(gate.freya.util.StringUtil.fromStringToSet(table)); elements.addAll(returnPropertyElements(filteredProperties)); for (String classUri : feedClasses) { allClasses.addAll( luceneAnnotator.getNeighbouringClassesWhereGivenClassIsADomain( classUri, forceSuperClasses)); allClasses.addAll( luceneAnnotator.getNeighbouringClassesWhereGivenClassIsARange( classUri, forceSuperClasses)); } Set<String> filteredFeedClasses = new HashSet<String>(); for (String t : feedClasses) { if (!isInIgnoreNameSpaceList(t)) { filteredFeedClasses.add(t); } } allClasses.addAll(filteredFeedClasses); elements.addAll(returnClassElements(allClasses)); return elements; }