/** * Create a disMaxJunc query rule based on the given search terms as well as the information from * given ontology terms * * @param ontologyTerms * @param searchTerms * @return disMaxJunc queryRule */ public QueryRule createDisMaxQueryRuleForAttribute( Set<String> searchTerms, Collection<OntologyTerm> ontologyTerms) { List<String> queryTerms = new ArrayList<String>(); if (searchTerms != null) { searchTerms .stream() .filter(searchTerm -> StringUtils.isNotBlank(searchTerm)) .forEach(searchTerm -> queryTerms.add(parseQueryString(searchTerm))); } // Handle tags with only one ontologyterm ontologyTerms .stream() .filter(ontologyTerm -> !ontologyTerm.getIRI().contains(",")) .forEach( ot -> { queryTerms.addAll(parseOntologyTermQueries(ot)); }); QueryRule disMaxQueryRule = createDisMaxQueryRuleForTerms(queryTerms); // Handle tags with multiple ontologyterms ontologyTerms .stream() .filter(ontologyTerm -> ontologyTerm.getIRI().contains(",")) .forEach( ot -> { disMaxQueryRule.getNestedRules().add(createShouldQueryRule(ot.getIRI())); }); return disMaxQueryRule; }
/** * Create a disMaxQueryRule with corresponding boosted value * * @param queryTerms * @param boostValue * @return a disMaxQueryRule with boosted value */ public QueryRule createDisMaxQueryRuleForTermsWithBoost( List<String> queryTerms, Double boostValue) { QueryRule finalDisMaxQuery = createDisMaxQueryRuleForTerms(queryTerms); if (boostValue != null && boostValue.intValue() != 0) { finalDisMaxQuery.setValue(boostValue); } return finalDisMaxQuery; }
/** * Create a boolean should query for composite tags containing multiple ontology terms * * @param multiOntologyTermIri * @return return a boolean should queryRule */ public QueryRule createShouldQueryRule(String multiOntologyTermIri) { QueryRule shouldQueryRule = new QueryRule(new ArrayList<QueryRule>()); shouldQueryRule.setOperator(Operator.SHOULD); for (String ontologyTermIri : multiOntologyTermIri.split(",")) { OntologyTerm ontologyTerm = ontologyService.getOntologyTerm(ontologyTermIri); List<String> queryTerms = parseOntologyTermQueries(ontologyTerm); Double termFrequency = termFrequencyService.getTermFrequency(ontologyTerm.getLabel()); shouldQueryRule .getNestedRules() .add(createDisMaxQueryRuleForTermsWithBoost(queryTerms, termFrequency)); } return shouldQueryRule; }
/** * Create disMaxJunc query rule based a list of queryTerm. All queryTerms are lower cased and stop * words are removed * * @param queryTerms * @return disMaxJunc queryRule */ public QueryRule createDisMaxQueryRuleForTerms(List<String> queryTerms) { List<QueryRule> rules = new ArrayList<QueryRule>(); queryTerms .stream() .filter(query -> StringUtils.isNotEmpty(query)) .map(QueryParser::escape) .map(this::reverseEscapeLuceneChar) .forEach( query -> { rules.add( new QueryRule(AttributeMetaDataMetaData.LABEL, Operator.FUZZY_MATCH, query)); rules.add( new QueryRule( AttributeMetaDataMetaData.DESCRIPTION, Operator.FUZZY_MATCH, query)); }); QueryRule finalDisMaxQuery = new QueryRule(rules); finalDisMaxQuery.setOperator(Operator.DIS_MAX); return finalDisMaxQuery; }
private static Attribute getQueryRuleAttribute(QueryRule queryRule, EntityType entityType) { String queryRuleField = queryRule.getField(); if (queryRuleField == null) { throw new MolgenisValidationException( new ConstraintViolation( format( "Query rule with operator [%s] is missing required field", queryRule.getOperator().toString()))); } Attribute attr = entityType.getAttribute(queryRuleField); if (attr == null) { throw new MolgenisValidationException( new ConstraintViolation( format( "Query rule field [%s] refers to unknown attribute in entity type [%s]", queryRuleField, entityType.getName()))); } return attr; }
private void validateQueryRule(QueryRule queryRule, EntityType entityType) { QueryRule.Operator operator = queryRule.getOperator(); switch (operator) { case AND: case NOT: case OR: break; case EQUALS: case FUZZY_MATCH: case FUZZY_MATCH_NGRAM: case GREATER: case GREATER_EQUAL: case LESS: case LESS_EQUAL: case LIKE: { Attribute attr = getQueryRuleAttribute(queryRule, entityType); Object value = toQueryRuleValue(queryRule.getValue(), attr); queryRule.setValue(value); break; } case SEARCH: { Object queryRuleValue = queryRule.getValue(); if (queryRuleValue != null && !(queryRuleValue instanceof String)) { // fix value type queryRule.setValue(queryRuleValue.toString()); } break; } case IN: case RANGE: { Attribute attr = getQueryRuleAttribute(queryRule, entityType); Object queryRuleValue = queryRule.getValue(); if (queryRuleValue != null) { if (!(queryRuleValue instanceof Iterable<?>)) { throw new MolgenisValidationException( new ConstraintViolation( format( "Query rule with operator [%s] value is of type [%s] instead of [Iterable]", operator, queryRuleValue.getClass().getSimpleName()))); } // fix value types Iterable<?> queryRuleValues = (Iterable<?>) queryRuleValue; List<Object> values = stream(queryRuleValues.spliterator(), false) .map(value -> toQueryRuleValue(value, attr)) .collect(toList()); queryRule.setValue(values); } break; } case DIS_MAX: case NESTED: case SHOULD: queryRule .getNestedRules() .forEach(nestedQueryRule -> validateQueryRule(nestedQueryRule, entityType)); break; default: throw new RuntimeException(format("Unknown query operator [%s]", operator.toString())); } }
@Test public void testFindAttributes() { DefaultEntityMetaData sourceEntityMetaData = new DefaultEntityMetaData("sourceEntityMetaData"); EntityMetaData targetEntityMetaData = new DefaultEntityMetaData("targetEntityMetaData"); DefaultAttributeMetaData targetAttribute = new DefaultAttributeMetaData("targetAttribute"); // Mock the id's of the attribute entities that should be searched List<String> attributeIdentifiers = Arrays.asList("1", "2"); when(semanticSearchServiceHelper.getAttributeIdentifiers(sourceEntityMetaData)) .thenReturn(attributeIdentifiers); // Mock the createDisMaxQueryRule method List<QueryRule> rules = new ArrayList<QueryRule>(); QueryRule targetQueryRuleLabel = new QueryRule(AttributeMetaDataMetaData.LABEL, Operator.FUZZY_MATCH, "height"); rules.add(targetQueryRuleLabel); QueryRule targetQueryRuleOntologyTermTag = new QueryRule(AttributeMetaDataMetaData.LABEL, Operator.FUZZY_MATCH, "standing height"); rules.add(targetQueryRuleOntologyTermTag); QueryRule targetQueryRuleOntologyTermTagSyn = new QueryRule(AttributeMetaDataMetaData.LABEL, Operator.FUZZY_MATCH, "length"); rules.add(targetQueryRuleOntologyTermTagSyn); QueryRule disMaxQueryRule = new QueryRule(rules); disMaxQueryRule.setOperator(Operator.DIS_MAX); when(semanticSearchServiceHelper.createDisMaxQueryRuleForAttribute( targetEntityMetaData, targetAttribute)) .thenReturn(disMaxQueryRule); MapEntity entity1 = new MapEntity( ImmutableMap.of( AttributeMetaDataMetaData.NAME, "height_0", AttributeMetaDataMetaData.LABEL, "height", AttributeMetaDataMetaData.DESCRIPTION, "this is a height measurement in m!")); List<Entity> attributeMetaDataEntities = Arrays.<Entity>asList(entity1); List<QueryRule> disMaxQueryRules = Lists.newArrayList( new QueryRule(AttributeMetaDataMetaData.IDENTIFIER, Operator.IN, attributeIdentifiers), new QueryRule(Operator.AND), disMaxQueryRule); AttributeMetaData attributeHeight = new DefaultAttributeMetaData("height_0"); AttributeMetaData attributeWeight = new DefaultAttributeMetaData("weight_0"); sourceEntityMetaData.addAttributeMetaData(attributeHeight); sourceEntityMetaData.addAttributeMetaData(attributeWeight); // Case 1 when(dataService.findAll( AttributeMetaDataMetaData.ENTITY_NAME, new QueryImpl(disMaxQueryRules))) .thenReturn(attributeMetaDataEntities); Iterable<AttributeMetaData> termsActual1 = semanticSearchService.findAttributes( sourceEntityMetaData, targetEntityMetaData, targetAttribute); Iterable<AttributeMetaData> termsExpected1 = Arrays.<AttributeMetaData>asList(attributeHeight); assertEquals(termsActual1, termsExpected1); // Case 2 when(dataService.findAll( AttributeMetaDataMetaData.ENTITY_NAME, new QueryImpl(disMaxQueryRules))) .thenReturn(Arrays.<Entity>asList()); Iterable<AttributeMetaData> termsActual2 = semanticSearchService.findAttributes( sourceEntityMetaData, targetEntityMetaData, targetAttribute); Iterable<AttributeMetaData> termsExpected2 = Arrays.<AttributeMetaData>asList(); assertEquals(termsActual2, termsExpected2); Mockito.reset(ontologyService); attribute.setDescription("Standing height (Ångstrøm)"); when(ontologyService.findOntologyTerms( ontologies, ImmutableSet.of("standing", "height", "ångstrøm"), 100)) .thenReturn(ontologyTerms); Hit<OntologyTerm> result = semanticSearchService.findTags(attribute, ontologies); assertEquals(result, Hit.<OntologyTerm>create(standingHeight, 0.76471f)); }