/** * creates a new InputManager Object. * * @param endpoint uri where a SPARQL endpoint listens * @param home the home directory including all relevant files/indices/models * @param inputQuestion the user question in natural language * @param parameter Map of input parameters, controlling used methods/algorithms etc. * @throws IOException if reading of 'config.properties' causes issues */ public InputManagerImpl(String endpoint, String inputQuestion, HashMap<String, String> parameter) throws IOException { // load properties and initiate the logger configManager = new ConfigManagerImpl(); configManager.loadProperties(); logger = configManager.initLogger(); logger.info("Start converting NL to SPARQL"); this.inputQuestion = inputQuestion; endpointConnector = new SPARQLEndpointConnectorImpl(endpoint); logger.debug("Input query: " + inputQuestion + "\tSPARQL endpoint: " + endpoint); // load the parameter list setParameter(parameter); }
@Override public List<SparqlCandidate> generateSparql() throws GenerateSparqlException { // ------------ Step A --------------------// // Preparation A.1: Check if the query is already a valid SPARQL query if (isActiveOption("directSparqlPossible", "true")) { // User inserted a valid SPARQL query, no further processing // necessary if (endpointConnector.isValidSparql(inputQuestion)) { logger.info("valid SPARQL query found - no preprocessing"); sparqlQueries.add(new SparqlCandidate(inputQuestion, 10.0)); logger.info("New SparqlQuery: '" + inputQuestion + "'"); return sparqlQueries; } } // ------------ Step B --------------------// // Rule-based Question Analysis (like in CASIA) // questionType = getQuestionType(taggedInputQuestion); // logger.info("Query Type: " + questionType); // if ((questionType != QuestionTypes.select_person) // && (questionType != QuestionTypes.select_thing)) { // // throw new GenerateSparqlException("Categorized question as " + // // questionType + ". Untill now only qeustions " // // + "asking for people and things are possible."); // } // ------------ Step C --------------------// // Question Analyzing List<QueryTriple> queryTriples = new ArrayList<QueryTriple>(); try { if (isActiveOption("questionAnalyzer", "ReVerb")) { // Preparation: POS tagging if (posTagger == null) posTagger = new MaxentTagger( configManager.getHome() + "models/english-left3words-distsim.tagger"); taggedInputQuestion = posTagger.tagString(inputQuestion); logger.info("Stanford POS Tagger returns: " + taggedInputQuestion); // -- using ReVerb to split query into "arg1 rel arg2" format // replace question word with variable TODO: delete line? String tagged_question_with_variable = replaceWHxxWithVariable(taggedInputQuestion); String question_with_variable = replacePOSTags(tagged_question_with_variable); logger.info("Query before question analysis by ReVerb: '" + question_with_variable + "'"); // Looks on the classpath for the default model files. ReVerb reverb = new ReVerbImpl(logger); queryTriples = reverb.run(question_with_variable); } // --- C.2 Rule-based approach inspired by CASIA if (isActiveOption("questionAnalyzer", "rulebased")) { RbQuestionAnalyzer analyzer = new RbQuestionAnalyzer(); queryTriples.addAll(analyzer.getQueryTriples(inputQuestion)); logger.info("Found triples by Rule-based Question Analyzer: " + queryTriples); } // --- C.3 Using English Chain Rules to identify entities. // Everything between entities // which is not a variable (question term) is regarded as a // predicate if (isActiveOption("questionAnalyzer", "RdfGroundedString")) { RdfGroundedStringAnalyzer analyzer = new RdfGroundedStringAnalyzerImpl(); queryTripleSet.addAll(analyzer.getQueryTriples(inputQuestion)); for (ArrayList<QueryTriple> triples : queryTripleSet) { logger.info("Found triple sets by RdfGroundedString Question Analyzer: "); triples.forEach(triple -> logger.info(triple)); } } // //--- C.3: find known entities/relations directly by trying to // compare Strings to the // // stored labels using Lucene Searcher // if (isActiveOption("questionAnalyzer", "StatistcMapper")) { // StatisticMapperImpl statistcMapper = new // StatisticMapperImpl(parameter); // queryTriples.addAll(statistcMapper.findCandidates(inputQuestion)); // logger.info("Found triples by Statistic Mapper: " + // queryTriples); // } // replace "VARIABLE" with "?variable" queryTriples.forEach((queryTriple) -> queryTriple.clean()); queryTripleSet.forEach(triples -> triples.forEach(triple -> triple.clean())); // -----Step D: // --- Entity and Relation Mapping to URIs // --- D.1: Using Naive Approach if (isActiveOption("resourceMapper", "naive")) { NaiveMapper mapper = new NaiveMapperImpl(this); queryTripleSet = mapper.getQueryTriples(inputQuestion); logger.info("Found URIs by NaiveMapper for '" + inputQuestion + "':"); for (List<QueryTriple> triples : queryTripleSet) { logger.info("Triple Set:"); for (QueryTriple triple : triples) { logger.info(triple.getTripleWithCandidates()); } } } // --- D.2: Using standard lucene if (isActiveOption("resourceMapper", "luceneStandard")) { LuceneMapper luceneMapper = new LuceneMapper(this); if (!queryTriples.isEmpty() && queryTripleSet.isEmpty()) { // map elements of the queryTriples list for (QueryTriple triple : queryTriples) { luceneMapper.mapQueryTriple(triple); } } else if (queryTriples.isEmpty() && !queryTripleSet.isEmpty()) { for (List<QueryTriple> triplesList : queryTripleSet) { for (QueryTriple triple : triplesList) { luceneMapper.mapQueryTriple(triple); } } } else if (queryTriples.isEmpty() && queryTripleSet.isEmpty()) { logger.error("No available QueryTriples for Lucene Standard Mapper."); } else if (!queryTriples.isEmpty() && !queryTripleSet.isEmpty()) { logger.error( "Can not decide which list of QueryTriples should be mapped by Lucene Standard Mapper."); } } // --- D.3: Using RdfGroundedString Mapper for relations and // EntityMaper for entities if (isActiveOption("resourceMapper", "RdfGroundedString")) { if (queryTripleSet.isEmpty()) { logger.error("No available QueryTriples for RdfGroundedString Mapper."); } else { // start finding the relations logger.info(""); logger.info("Mapping predicates using RdfGroundedString Mapper:"); logger.info(""); RdfGroundedStringMapper mapper = new RdfGroundedStringMapperImpl(); for (int i = 0; i < queryTripleSet.size(); i++) { ArrayList<QueryTriple> triples = queryTripleSet.get(i); logger.info(""); logger.info("Search relations for QueryTriple Set:"); logger.info(""); for (QueryTriple triple : triples) { List<RelationCandidate> relationCandidates = mapper.findRelationCandidates(triple.getPredicate()); if (relationCandidates != null) { triple.addRelationCandidates(relationCandidates); logger.info("Found relations for triple '" + triple.toString() + "':"); for (RelationCandidate relationCandidate : relationCandidates) { logger.info(relationCandidate.toString()); } } else { logger.warn( "Could not map triple '" + triple.toString() + "'. Remove query triple set and continue with next set."); queryTripleSet.remove(i); i--; break; } } } // start mapping the entities logger.info(""); logger.info("Mapping entities using Lucene Mapper:"); logger.info(""); LuceneMapper luceneMapper = new LuceneMapper(this); for (ArrayList<QueryTriple> triples : queryTripleSet) { logger.info(""); logger.info("Search entities for QueryTriple Set:"); for (QueryTriple triple : triples) { luceneMapper.mapEntitiesAndClasses(triple); } } logger.info("Found URIs by RdfGroundedStrings Mapper for '" + inputQuestion + "':"); for (List<QueryTriple> triples : queryTripleSet) { double score = CommonMethods.getTriplesScore(triples); logger.info("Triple Set (" + score + "):"); for (QueryTriple triple : triples) { logger.info(triple.getTripleWithCandidates()); } } queryTripleSet.sort(new TripleSetComparator()); } } // ------ Step E: // --- Combine entities and relations to possible queries if (isActiveOption("sparqlGenerator", "standard")) { SparqlGenerator sparlqGenerator = new SparqlGeneratorImpl(this); sparlqGenerator.setNumberOfCandidates( Integer.parseInt(getOption("NumberOfSparqlCandidates"))); sparlqGenerator.setSparqlLimit(Integer.parseInt(getOption("SparqlLimit"))); if (!queryTriples.isEmpty() && queryTripleSet.isEmpty()) { // create SPARQL using queryTriples sparqlQueries = sparlqGenerator.getSparqlCanidates(queryTriples); for (SparqlCandidate sparqlCandidate : sparqlQueries) { logger.info("Found SPARQL queries by SPARQL Generator: " + sparqlCandidate); } } else if (queryTriples.isEmpty() && !queryTripleSet.isEmpty()) { // create SPARQL using queryTripleSet sparqlQueries = sparlqGenerator.getSparqlCanidatesForQueryTripleSet(queryTripleSet); for (SparqlCandidate sparqlCandidate : sparqlQueries) { logger.info("Found SPARQL queries by SPARQL Generator: " + sparqlCandidate); } } else if (queryTriples.isEmpty() && queryTripleSet.isEmpty()) { logger.error("No available QueryTriples for SparqlGenerator."); } else if (!queryTriples.isEmpty() && !queryTripleSet.isEmpty()) { logger.error( "Can not decide which list of QueryTriples should be used by SparqlGenerator."); } } if (isActiveOption("sparqlGenerator", "selectQueries")) { logger.warn("Option 'sparqlGenerator:selectQueries' contains bugs and is not recommended."); int numberOfTriplesPerSparql = 2; try { numberOfTriplesPerSparql = Integer.parseInt(getOption("numberOfTriplesPerSparql")); } catch (GenerateSparqlException | NumberFormatException e) { logger.warn( "Failed to read parameter 'numberOfTriplesPerSparql', using default value 2 ."); } SparqlGenerator sparlqGenerator = new SparqlGeneratorImpl(this); sparlqGenerator.setNumberOfCandidates( Integer.parseInt(getOption("NumberOfSparqlCandidates"))); sparqlQueries = sparlqGenerator.getSparqlCanidates(queryTriples, numberOfTriplesPerSparql); logger.info("Found SPARQL queries by SPARQL Generator: "); for (SparqlCandidate sparql : sparqlQueries) { logger.info(sparql); } } // Step E.3 the NaiveMapper needs a special kind of treatment if (isActiveOption("resourceMapper", "naive")) { SparqlGenerator sparlqGenerator = new SparqlGeneratorImpl(this); sparlqGenerator.setNumberOfCandidates( Integer.parseInt(getOption("NumberOfSparqlCandidates"))); sparqlQueries = sparlqGenerator.getSparqlCanidatesForQueryTripleSet(queryTripleSet); logger.info("Found SPARQL queries by SPARQL Generator: "); for (SparqlCandidate sparql : sparqlQueries) { logger.info(sparql); } } return sparqlQueries; } catch (ConfidenceFunctionException e) { // TODO Auto-generated catch block logger.error("", e); } catch (IOException e) { logger.error("", e); } return null; }