Пример #1
0
  /**
   * creates a new InputManager Object.
   *
   * @param endpoint uri where a SPARQL endpoint listens
   * @param home the home directory including all relevant files/indices/models
   * @param inputQuestion the user question in natural language
   * @param parameter Map of input parameters, controlling used methods/algorithms etc.
   * @throws IOException if reading of 'config.properties' causes issues
   */
  public InputManagerImpl(String endpoint, String inputQuestion, HashMap<String, String> parameter)
      throws IOException {
    // load properties and initiate the logger
    configManager = new ConfigManagerImpl();
    configManager.loadProperties();
    logger = configManager.initLogger();

    logger.info("Start converting NL to SPARQL");

    this.inputQuestion = inputQuestion;
    endpointConnector = new SPARQLEndpointConnectorImpl(endpoint);
    logger.debug("Input query: " + inputQuestion + "\tSPARQL endpoint: " + endpoint);

    // load the parameter list
    setParameter(parameter);
  }
Пример #2
0
  @Override
  public List<SparqlCandidate> generateSparql() throws GenerateSparqlException {

    // ------------ Step A --------------------//
    // Preparation A.1: Check if the query is already a valid SPARQL query
    if (isActiveOption("directSparqlPossible", "true")) {
      // User inserted a valid SPARQL query, no further processing
      // necessary
      if (endpointConnector.isValidSparql(inputQuestion)) {
        logger.info("valid SPARQL query found - no preprocessing");
        sparqlQueries.add(new SparqlCandidate(inputQuestion, 10.0));
        logger.info("New SparqlQuery: '" + inputQuestion + "'");
        return sparqlQueries;
      }
    }

    // ------------ Step B --------------------//
    // Rule-based Question Analysis (like in CASIA)
    //		questionType = getQuestionType(taggedInputQuestion);
    //		logger.info("Query Type: " + questionType);
    //		if ((questionType != QuestionTypes.select_person)
    //				&& (questionType != QuestionTypes.select_thing)) {
    //			// throw new GenerateSparqlException("Categorized question as " +
    //			// questionType + ". Untill now only qeustions "
    //			// + "asking for people and things are possible.");
    //		}

    // ------------ Step C --------------------//
    // Question Analyzing
    List<QueryTriple> queryTriples = new ArrayList<QueryTriple>();

    try {

      if (isActiveOption("questionAnalyzer", "ReVerb")) {
        // Preparation: POS tagging
        if (posTagger == null)
          posTagger =
              new MaxentTagger(
                  configManager.getHome() + "models/english-left3words-distsim.tagger");
        taggedInputQuestion = posTagger.tagString(inputQuestion);
        logger.info("Stanford POS Tagger returns: " + taggedInputQuestion);

        // -- using ReVerb to split query into "arg1 rel arg2" format

        // replace question word with variable TODO: delete line?
        String tagged_question_with_variable = replaceWHxxWithVariable(taggedInputQuestion);

        String question_with_variable = replacePOSTags(tagged_question_with_variable);
        logger.info("Query before question analysis by ReVerb: '" + question_with_variable + "'");

        // Looks on the classpath for the default model files.
        ReVerb reverb = new ReVerbImpl(logger);
        queryTriples = reverb.run(question_with_variable);
      }

      // --- C.2 Rule-based approach inspired by CASIA
      if (isActiveOption("questionAnalyzer", "rulebased")) {
        RbQuestionAnalyzer analyzer = new RbQuestionAnalyzer();
        queryTriples.addAll(analyzer.getQueryTriples(inputQuestion));
        logger.info("Found triples by Rule-based Question Analyzer: " + queryTriples);
      }

      // --- C.3 Using English Chain Rules to identify entities.
      // Everything between entities
      // which is not a variable (question term) is regarded as a
      // predicate
      if (isActiveOption("questionAnalyzer", "RdfGroundedString")) {
        RdfGroundedStringAnalyzer analyzer = new RdfGroundedStringAnalyzerImpl();
        queryTripleSet.addAll(analyzer.getQueryTriples(inputQuestion));
        for (ArrayList<QueryTriple> triples : queryTripleSet) {
          logger.info("Found triple sets by RdfGroundedString Question Analyzer: ");
          triples.forEach(triple -> logger.info(triple));
        }
      }

      // //--- C.3: find known entities/relations directly by trying to
      // compare Strings to the
      // // stored labels using Lucene Searcher
      // if (isActiveOption("questionAnalyzer", "StatistcMapper")) {
      // StatisticMapperImpl statistcMapper = new
      // StatisticMapperImpl(parameter);
      // queryTriples.addAll(statistcMapper.findCandidates(inputQuestion));
      // logger.info("Found triples by Statistic Mapper: " +
      // queryTriples);
      // }

      // replace "VARIABLE" with "?variable"
      queryTriples.forEach((queryTriple) -> queryTriple.clean());
      queryTripleSet.forEach(triples -> triples.forEach(triple -> triple.clean()));

      // -----Step D:
      // --- Entity and Relation Mapping to URIs

      // --- D.1: Using Naive Approach
      if (isActiveOption("resourceMapper", "naive")) {
        NaiveMapper mapper = new NaiveMapperImpl(this);
        queryTripleSet = mapper.getQueryTriples(inputQuestion);

        logger.info("Found URIs by NaiveMapper for '" + inputQuestion + "':");
        for (List<QueryTriple> triples : queryTripleSet) {
          logger.info("Triple Set:");
          for (QueryTriple triple : triples) {
            logger.info(triple.getTripleWithCandidates());
          }
        }
      }

      // --- D.2: Using standard lucene
      if (isActiveOption("resourceMapper", "luceneStandard")) {
        LuceneMapper luceneMapper = new LuceneMapper(this);

        if (!queryTriples.isEmpty() && queryTripleSet.isEmpty()) {
          // map elements of the queryTriples list
          for (QueryTriple triple : queryTriples) {
            luceneMapper.mapQueryTriple(triple);
          }

        } else if (queryTriples.isEmpty() && !queryTripleSet.isEmpty()) {

          for (List<QueryTriple> triplesList : queryTripleSet) {
            for (QueryTriple triple : triplesList) {
              luceneMapper.mapQueryTriple(triple);
            }
          }

        } else if (queryTriples.isEmpty() && queryTripleSet.isEmpty()) {
          logger.error("No available QueryTriples for Lucene Standard Mapper.");
        } else if (!queryTriples.isEmpty() && !queryTripleSet.isEmpty()) {
          logger.error(
              "Can not decide which list of QueryTriples should be mapped by Lucene Standard Mapper.");
        }
      }

      // --- D.3: Using RdfGroundedString Mapper for relations and
      // EntityMaper for entities
      if (isActiveOption("resourceMapper", "RdfGroundedString")) {

        if (queryTripleSet.isEmpty()) {
          logger.error("No available QueryTriples for RdfGroundedString Mapper.");
        } else {

          // start finding the relations
          logger.info("");
          logger.info("Mapping predicates using RdfGroundedString Mapper:");
          logger.info("");

          RdfGroundedStringMapper mapper = new RdfGroundedStringMapperImpl();
          for (int i = 0; i < queryTripleSet.size(); i++) {
            ArrayList<QueryTriple> triples = queryTripleSet.get(i);

            logger.info("");
            logger.info("Search relations for QueryTriple Set:");
            logger.info("");
            for (QueryTriple triple : triples) {
              List<RelationCandidate> relationCandidates =
                  mapper.findRelationCandidates(triple.getPredicate());

              if (relationCandidates != null) {
                triple.addRelationCandidates(relationCandidates);
                logger.info("Found relations for triple '" + triple.toString() + "':");
                for (RelationCandidate relationCandidate : relationCandidates) {
                  logger.info(relationCandidate.toString());
                }
              } else {
                logger.warn(
                    "Could not map triple '"
                        + triple.toString()
                        + "'. Remove query triple set and continue with next set.");
                queryTripleSet.remove(i);
                i--;
                break;
              }
            }
          }

          // start mapping the entities
          logger.info("");
          logger.info("Mapping entities using Lucene Mapper:");
          logger.info("");

          LuceneMapper luceneMapper = new LuceneMapper(this);
          for (ArrayList<QueryTriple> triples : queryTripleSet) {
            logger.info("");
            logger.info("Search entities for QueryTriple Set:");
            for (QueryTriple triple : triples) {
              luceneMapper.mapEntitiesAndClasses(triple);
            }
          }

          logger.info("Found URIs by RdfGroundedStrings Mapper for '" + inputQuestion + "':");
          for (List<QueryTriple> triples : queryTripleSet) {
            double score = CommonMethods.getTriplesScore(triples);
            logger.info("Triple Set (" + score + "):");
            for (QueryTriple triple : triples) {
              logger.info(triple.getTripleWithCandidates());
            }
          }

          queryTripleSet.sort(new TripleSetComparator());
        }
      }

      // ------ Step E:
      // --- Combine entities and relations to possible queries
      if (isActiveOption("sparqlGenerator", "standard")) {
        SparqlGenerator sparlqGenerator = new SparqlGeneratorImpl(this);
        sparlqGenerator.setNumberOfCandidates(
            Integer.parseInt(getOption("NumberOfSparqlCandidates")));
        sparlqGenerator.setSparqlLimit(Integer.parseInt(getOption("SparqlLimit")));

        if (!queryTriples.isEmpty() && queryTripleSet.isEmpty()) {
          // create SPARQL using queryTriples
          sparqlQueries = sparlqGenerator.getSparqlCanidates(queryTriples);

          for (SparqlCandidate sparqlCandidate : sparqlQueries) {
            logger.info("Found SPARQL queries by SPARQL Generator: " + sparqlCandidate);
          }
        } else if (queryTriples.isEmpty() && !queryTripleSet.isEmpty()) {
          // create SPARQL using queryTripleSet
          sparqlQueries = sparlqGenerator.getSparqlCanidatesForQueryTripleSet(queryTripleSet);

          for (SparqlCandidate sparqlCandidate : sparqlQueries) {
            logger.info("Found SPARQL queries by SPARQL Generator: " + sparqlCandidate);
          }
        } else if (queryTriples.isEmpty() && queryTripleSet.isEmpty()) {
          logger.error("No available QueryTriples for SparqlGenerator.");
        } else if (!queryTriples.isEmpty() && !queryTripleSet.isEmpty()) {
          logger.error(
              "Can not decide which list of QueryTriples should be used by SparqlGenerator.");
        }
      }

      if (isActiveOption("sparqlGenerator", "selectQueries")) {
        logger.warn("Option 'sparqlGenerator:selectQueries' contains bugs and is not recommended.");
        int numberOfTriplesPerSparql = 2;
        try {
          numberOfTriplesPerSparql = Integer.parseInt(getOption("numberOfTriplesPerSparql"));
        } catch (GenerateSparqlException | NumberFormatException e) {
          logger.warn(
              "Failed to read parameter 'numberOfTriplesPerSparql', using default value 2 .");
        }

        SparqlGenerator sparlqGenerator = new SparqlGeneratorImpl(this);
        sparlqGenerator.setNumberOfCandidates(
            Integer.parseInt(getOption("NumberOfSparqlCandidates")));
        sparqlQueries = sparlqGenerator.getSparqlCanidates(queryTriples, numberOfTriplesPerSparql);

        logger.info("Found SPARQL queries by SPARQL Generator: ");
        for (SparqlCandidate sparql : sparqlQueries) {
          logger.info(sparql);
        }
      }

      // Step E.3 the NaiveMapper needs a special kind of treatment
      if (isActiveOption("resourceMapper", "naive")) {
        SparqlGenerator sparlqGenerator = new SparqlGeneratorImpl(this);
        sparlqGenerator.setNumberOfCandidates(
            Integer.parseInt(getOption("NumberOfSparqlCandidates")));
        sparqlQueries = sparlqGenerator.getSparqlCanidatesForQueryTripleSet(queryTripleSet);

        logger.info("Found SPARQL queries by SPARQL Generator: ");
        for (SparqlCandidate sparql : sparqlQueries) {
          logger.info(sparql);
        }
      }

      return sparqlQueries;

    } catch (ConfidenceFunctionException e) {
      // TODO Auto-generated catch block
      logger.error("", e);
    } catch (IOException e) {
      logger.error("", e);
    }

    return null;
  }