/** * Create a list of string queries based on the information collected from current ontologyterm * including label, synonyms and child ontologyterms * * @param ontologyTerm * @return */ public List<String> parseOntologyTermQueries(OntologyTerm ontologyTerm) { List<String> queryTerms = getOtLabelAndSynonyms(ontologyTerm) .stream() .map(term -> parseQueryString(term)) .collect(Collectors.<String>toList()); for (OntologyTerm childOt : ontologyService.getChildren(ontologyTerm)) { double boostedNumber = Math.pow(0.5, ontologyService.getOntologyTermDistance(ontologyTerm, childOt)); getOtLabelAndSynonyms(childOt) .forEach(synonym -> queryTerms.add(parseBoostQueryString(synonym, boostedNumber))); } return queryTerms; }
public List<OntologyTerm> findTags(String description, List<String> ontologyIds) { Set<String> searchTerms = removeStopWords(description); List<OntologyTerm> matchingOntologyTerms = ontologyService.findOntologyTerms(ontologyIds, searchTerms, MAX_NUM_TAGS); return matchingOntologyTerms; }
@Test public void testSearchUnicode() throws InterruptedException, ExecutionException { Mockito.reset(ontologyService); attribute.setDescription("/əˈnædrəməs/"); when(ontologyService.findOntologyTerms(ontologies, ImmutableSet.of("əˈnædrəməs"), 100)) .thenReturn(ontologyTerms); Hit<OntologyTerm> result = semanticSearchService.findTags(attribute, ontologies); assertEquals(result, null); }
@Test public void testSearchDescription() throws InterruptedException, ExecutionException { Mockito.reset(ontologyService); attribute.setDescription("Standing height in meters."); when(ontologyService.findOntologyTerms( ontologies, ImmutableSet.<String>of("standing", "height", "meters"), 100)) .thenReturn(ontologyTerms); Hit<OntologyTerm> result = semanticSearchService.findTags(attribute, ontologies); assertEquals(result, Hit.<OntologyTerm>create(standingHeight, 0.81250f)); }
@Test public void testSearchHypertension() throws InterruptedException, ExecutionException { Mockito.reset(ontologyService); attribute.setDescription("History of Hypertension"); when(ontologyService.findOntologyTerms( ontologies, ImmutableSet.<String>of("history", "hypertens"), 100)) .thenReturn(ontologyTerms); Hit<OntologyTerm> result = semanticSearchService.findTags(attribute, ontologies); assertEquals(result, null); }
@Test public void testSearchMultipleTags() throws InterruptedException, ExecutionException { Mockito.reset(ontologyService); attribute.setDescription("Body mass index"); when(ontologyService.findOntologyTerms( ontologies, ImmutableSet.of("body", "mass", "index"), 100)) .thenReturn(ontologyTerms); Hit<OntologyTerm> result = semanticSearchService.findTags(attribute, ontologies); assertEquals(result, null); }
/** * Create a boolean should query for composite tags containing multiple ontology terms * * @param multiOntologyTermIri * @return return a boolean should queryRule */ public QueryRule createShouldQueryRule(String multiOntologyTermIri) { QueryRule shouldQueryRule = new QueryRule(new ArrayList<QueryRule>()); shouldQueryRule.setOperator(Operator.SHOULD); for (String ontologyTermIri : multiOntologyTermIri.split(",")) { OntologyTerm ontologyTerm = ontologyService.getOntologyTerm(ontologyTermIri); List<String> queryTerms = parseOntologyTermQueries(ontologyTerm); Double termFrequency = termFrequencyService.getTermFrequency(ontologyTerm.getLabel()); shouldQueryRule .getNestedRules() .add(createDisMaxQueryRuleForTermsWithBoost(queryTerms, termFrequency)); } return shouldQueryRule; }
public void collectOntologyTermQueryMap( Map<String, String> expanedQueryMap, OntologyTerm ontologyTerm) { if (ontologyTerm != null) { getOtLabelAndSynonyms(ontologyTerm) .forEach( term -> expanedQueryMap.put(stemmer.cleanStemPhrase(term), ontologyTerm.getLabel())); for (OntologyTerm childOntologyTerm : ontologyService.getChildren(ontologyTerm)) { getOtLabelAndSynonyms(childOntologyTerm) .forEach( term -> expanedQueryMap.put(stemmer.cleanStemPhrase(term), ontologyTerm.getLabel())); } } }
public Map<String, String> collectExpandedQueryMap( Set<String> queryTerms, Collection<OntologyTerm> ontologyTerms) { Map<String, String> expandedQueryMap = new LinkedHashMap<String, String>(); queryTerms .stream() .filter(StringUtils::isNotBlank) .forEach(queryTerm -> expandedQueryMap.put(stemmer.cleanStemPhrase(queryTerm), queryTerm)); for (OntologyTerm ontologyTerm : ontologyTerms) { if (!ontologyTerm.getIRI().contains(",")) { collectOntologyTermQueryMap(expandedQueryMap, ontologyTerm); } else { for (String ontologyTermIri : ontologyTerm.getIRI().split(",")) { collectOntologyTermQueryMap( expandedQueryMap, ontologyService.getOntologyTerm(ontologyTermIri)); } } } return expandedQueryMap; }
@Test public void testFindAttributes() { DefaultEntityMetaData sourceEntityMetaData = new DefaultEntityMetaData("sourceEntityMetaData"); EntityMetaData targetEntityMetaData = new DefaultEntityMetaData("targetEntityMetaData"); DefaultAttributeMetaData targetAttribute = new DefaultAttributeMetaData("targetAttribute"); // Mock the id's of the attribute entities that should be searched List<String> attributeIdentifiers = Arrays.asList("1", "2"); when(semanticSearchServiceHelper.getAttributeIdentifiers(sourceEntityMetaData)) .thenReturn(attributeIdentifiers); // Mock the createDisMaxQueryRule method List<QueryRule> rules = new ArrayList<QueryRule>(); QueryRule targetQueryRuleLabel = new QueryRule(AttributeMetaDataMetaData.LABEL, Operator.FUZZY_MATCH, "height"); rules.add(targetQueryRuleLabel); QueryRule targetQueryRuleOntologyTermTag = new QueryRule(AttributeMetaDataMetaData.LABEL, Operator.FUZZY_MATCH, "standing height"); rules.add(targetQueryRuleOntologyTermTag); QueryRule targetQueryRuleOntologyTermTagSyn = new QueryRule(AttributeMetaDataMetaData.LABEL, Operator.FUZZY_MATCH, "length"); rules.add(targetQueryRuleOntologyTermTagSyn); QueryRule disMaxQueryRule = new QueryRule(rules); disMaxQueryRule.setOperator(Operator.DIS_MAX); when(semanticSearchServiceHelper.createDisMaxQueryRuleForAttribute( targetEntityMetaData, targetAttribute)) .thenReturn(disMaxQueryRule); MapEntity entity1 = new MapEntity( ImmutableMap.of( AttributeMetaDataMetaData.NAME, "height_0", AttributeMetaDataMetaData.LABEL, "height", AttributeMetaDataMetaData.DESCRIPTION, "this is a height measurement in m!")); List<Entity> attributeMetaDataEntities = Arrays.<Entity>asList(entity1); List<QueryRule> disMaxQueryRules = Lists.newArrayList( new QueryRule(AttributeMetaDataMetaData.IDENTIFIER, Operator.IN, attributeIdentifiers), new QueryRule(Operator.AND), disMaxQueryRule); AttributeMetaData attributeHeight = new DefaultAttributeMetaData("height_0"); AttributeMetaData attributeWeight = new DefaultAttributeMetaData("weight_0"); sourceEntityMetaData.addAttributeMetaData(attributeHeight); sourceEntityMetaData.addAttributeMetaData(attributeWeight); // Case 1 when(dataService.findAll( AttributeMetaDataMetaData.ENTITY_NAME, new QueryImpl(disMaxQueryRules))) .thenReturn(attributeMetaDataEntities); Iterable<AttributeMetaData> termsActual1 = semanticSearchService.findAttributes( sourceEntityMetaData, targetEntityMetaData, targetAttribute); Iterable<AttributeMetaData> termsExpected1 = Arrays.<AttributeMetaData>asList(attributeHeight); assertEquals(termsActual1, termsExpected1); // Case 2 when(dataService.findAll( AttributeMetaDataMetaData.ENTITY_NAME, new QueryImpl(disMaxQueryRules))) .thenReturn(Arrays.<Entity>asList()); Iterable<AttributeMetaData> termsActual2 = semanticSearchService.findAttributes( sourceEntityMetaData, targetEntityMetaData, targetAttribute); Iterable<AttributeMetaData> termsExpected2 = Arrays.<AttributeMetaData>asList(); assertEquals(termsActual2, termsExpected2); Mockito.reset(ontologyService); attribute.setDescription("Standing height (Ångstrøm)"); when(ontologyService.findOntologyTerms( ontologies, ImmutableSet.of("standing", "height", "ångstrøm"), 100)) .thenReturn(ontologyTerms); Hit<OntologyTerm> result = semanticSearchService.findTags(attribute, ontologies); assertEquals(result, Hit.<OntologyTerm>create(standingHeight, 0.76471f)); }