/**
   * Initializes the list-based NE taggers.
   *
   * @param listDirectory path of the directory the list files are located in
   */
  public static void loadListTaggers(String listDirectory) {
    if (lists.length > 0) return;

    MsgPrinter.printStatusMsg("  ...loading lists");

    ArrayList<String> listsList = new ArrayList<String>();
    ArrayList<String> listNamesList = new ArrayList<String>();

    File[] listFiles =
        new File(listDirectory)
            .listFiles(
                new FileFilter() {
                  public boolean accept(File pathname) {
                    return pathname.getName().endsWith(".lst");
                  }
                });
    Arrays.sort(listFiles);

    for (File list : listFiles) {
      String listName = list.getName();
      listsList.add(list.getName());
      listName = listName.substring(0, (listName.length() - 4));
      listNamesList.add("NE" + listName);
      MsgPrinter.printStatusMsg("    ...for NE" + listName);
    }

    lists = listsList.toArray(new String[listsList.size()]);
    listNames = listNamesList.toArray(new String[listNamesList.size()]);
  }
示例#2
0
  /**
   * Entry point of Ephyra. Initializes the engine and starts the command line interface.
   *
   * @param args command line arguments are ignored
   */
  public static void main(String[] args) {
    // enable output of status and error messages
    MsgPrinter.enableStatusMsgs(true);
    MsgPrinter.enableErrorMsgs(true);

    // set log file and enable logging
    Logger.setLogfile("log/OpenEphyra");
    Logger.enableLogging(true);

    // initialize Ephyra and start command line interface
    (new OpenEphyra()).commandLine();
  }
示例#3
0
  /**
   * Runs the pipeline and returns an array of up to <code>maxAnswers</code> results that have a
   * score of at least <code>absThresh</code>.
   *
   * @param aq analyzed question
   * @param maxAnswers maximum number of answers
   * @param absThresh absolute threshold for scores
   * @return array of results
   */
  protected Result[] runPipeline(AnalyzedQuestion aq, int maxAnswers, float absThresh) {
    // query generation
    MsgPrinter.printGeneratingQueries();
    Query[] queries = QueryGeneration.getQueries(aq);

    // search
    MsgPrinter.printSearching();
    Result[] results = Search.doSearch(queries);

    // answer selection
    MsgPrinter.printSelectingAnswers();
    results = AnswerSelection.getResults(results, maxAnswers, absThresh);

    return results;
  }
示例#4
0
  /**
   * A command line interface for Ephyra.
   *
   * <p>Repeatedly queries the user for a question, asks the system the question and prints out and
   * logs the results.
   *
   * <p>The command <code>exit</code> can be used to quit the program.
   */
  public void commandLine() {
    while (true) {
      // query user for question, quit if user types in "exit"
      MsgPrinter.printQuestionPrompt();
      String question = readLine().trim();
      if (question.equalsIgnoreCase("exit")) System.exit(0);

      // determine question type and extract question string
      String type;
      if (question.matches("(?i)" + FACTOID + ":.*+")) {
        // factoid question
        type = FACTOID;
        question = question.split(":", 2)[1].trim();
      } else if (question.matches("(?i)" + LIST + ":.*+")) {
        // list question
        type = LIST;
        question = question.split(":", 2)[1].trim();
      } else {
        // question type unspecified
        type = FACTOID; // default type
      }

      // ask question
      Result[] results = new Result[0];
      if (type.equals(FACTOID)) {
        Logger.logFactoidStart(question);
        results = askFactoid(question, FACTOID_MAX_ANSWERS, FACTOID_ABS_THRESH);
        Logger.logResults(results);
        Logger.logFactoidEnd();
      } else if (type.equals(LIST)) {
        Logger.logListStart(question);
        results = askList(question, LIST_REL_THRESH);
        Logger.logResults(results);
        Logger.logListEnd();
      }

      // print answers
      MsgPrinter.printAnswers(results);
    }
  }
示例#5
0
  /**
   * Asks Ephyra a factoid question and returns up to <code>maxAnswers</code> results that have a
   * score of at least <code>absThresh</code>.
   *
   * @param question factoid question
   * @param maxAnswers maximum number of answers
   * @param absThresh absolute threshold for scores
   * @return array of results
   */
  public Result[] askFactoid(String question, int maxAnswers, float absThresh) {
    // initialize pipeline
    initFactoid();

    // analyze question
    MsgPrinter.printAnalyzingQuestion();
    AnalyzedQuestion aq = QuestionAnalysis.analyze(question);

    // get answers
    Result[] results = runPipeline(aq, maxAnswers, absThresh);

    return results;
  }
  /**
   * Creates the OpenNLP name finders and sets the named entity types that are recognized by the
   * finders.
   *
   * @param dir directory containing the models for the name finders
   * @return true, iff the name finders were created successfully
   */
  public static boolean loadNameFinders(String dir) {
    File[] files = FileUtils.getFiles(dir);

    finders = new NameFinder[files.length];
    finderNames = new String[files.length];

    try {
      for (int i = 0; i < files.length; i++) {
        MaxentModel model = new SuffixSensitiveGISModelReader(files[i]).getModel();

        finders[i] = new NameFinder(model);
        finderNames[i] = files[i].getName().split("\\.")[0];
        MsgPrinter.printStatusMsg("    ...for " + finderNames[i]);
      }
    } catch (IOException e) {
      return false;
    }

    return true;
  }
  /**
   * Initializes the regular expression based NE taggers.
   *
   * @param regExListFileName path and name of the file the names of the patterns in use are found
   *     in
   */
  public static void loadRegExTaggers(String regExListFileName) {
    if (patterns.length > 0) return;

    MsgPrinter.printStatusMsg("  ...loading patterns");

    ArrayList<String> patternNameList = new ArrayList<String>();
    ArrayList<Pattern> patternList = new ArrayList<Pattern>();
    ArrayList<Integer> patternMaxTokensList = new ArrayList<Integer>();

    ArrayList<String> quantityPatternNameList = new ArrayList<String>();
    ArrayList<Pattern> quantityPatternList = new ArrayList<Pattern>();
    ArrayList<Integer> quantityPatternMaxTokensList = new ArrayList<Integer>();

    ArrayList<String> quantityUnitPatternNameList = new ArrayList<String>();
    ArrayList<Pattern> quantityUnitPatternList = new ArrayList<Pattern>();
    ArrayList<Integer> quantityUnitPatternMaxTokensList = new ArrayList<Integer>();

    try {
      BufferedReader br =
          new BufferedReader(
              new FileReader(
                  regExListFileName)); // new BufferedReader(new
                                       // FileReader("./res/nlp/netagger/patterns.lst"));
      String line;
      while ((line = br.readLine()) != null) {
        String neName = "NE" + line;
        String patternFieldNamePrefix = "";
        for (int c = 0; c < line.length(); c++) {
          char ch = line.charAt(c);
          if (Character.isUpperCase(ch)) patternFieldNamePrefix += "_" + ch;
          else patternFieldNamePrefix += Character.toUpperCase(ch);
        }

        String regExFieldName = patternFieldNamePrefix;
        String patternFieldName = patternFieldNamePrefix + "_PATTERN";
        String maxTokensFieldName = patternFieldNamePrefix + "_MAX_TOKENS";

        try {
          Field regExField = RegExMatcher.class.getField(regExFieldName);
          Field patternField = RegExMatcher.class.getField(patternFieldName);
          Field maxTokensField = RegExMatcher.class.getField(maxTokensFieldName);

          String regEx = regExField.get(null).toString();
          Pattern pattern = ((Pattern) patternField.get(null));
          int maxTokens = maxTokensField.getInt(null);

          boolean isQuantity =
              ((regEx.indexOf(RegExMatcher.NUMBER) != -1) && !regEx.equals(RegExMatcher.NUMBER));

          if (isQuantity) {
            try {
              String unitPatternFieldName = patternFieldNamePrefix + "_UNIT_PATTERN";
              String unitMaxTokensFieldName = patternFieldNamePrefix + "_UNIT_MAX_TOKENS";

              Field unitPatternField = RegExMatcher.class.getField(unitPatternFieldName);
              Field unitMaxTokensField = RegExMatcher.class.getField(unitMaxTokensFieldName);

              Pattern unitPattern = ((Pattern) unitPatternField.get(null));
              int unitMaxTokens = unitMaxTokensField.getInt(null);

              quantityPatternNameList.add(neName);
              quantityPatternList.add(pattern);
              quantityPatternMaxTokensList.add(new Integer(maxTokens));

              quantityUnitPatternNameList.add(neName);
              quantityUnitPatternList.add(unitPattern);
              quantityUnitPatternMaxTokensList.add(new Integer(unitMaxTokens));
            } catch (Exception e) {
              isQuantity = false;
            }
          }

          if (!isQuantity) {
            patternNameList.add(neName);
            patternList.add(pattern);
            patternMaxTokensList.add(new Integer(maxTokens));
          }

          MsgPrinter.printStatusMsg("    ...for " + neName);
        } catch (Exception e) {
          MsgPrinter.printErrorMsg("    ...could not add " + neName);
        }
      }

      patternNames = new String[patternNameList.size()];
      patterns = new Pattern[patternList.size()];
      patternMaxTokens = new int[patternMaxTokensList.size()];
      for (int p = 0; p < patternNameList.size(); p++) {
        patternNames[p] = patternNameList.get(p);
        patterns[p] = patternList.get(p);
        patternMaxTokens[p] = patternMaxTokensList.get(p).intValue();
      }

      quantityPatternNames = new String[quantityPatternNameList.size()];
      quantityPatterns = new Pattern[quantityPatternList.size()];
      quantityUnitPatterns = new Pattern[quantityUnitPatternList.size()];
      //			quantityPatternMaxTokens = new int[quantityPatternMaxTokensList.size()];
      quantityUnitPatternMaxTokens = new int[quantityUnitPatternMaxTokensList.size()];
      for (int p = 0; p < quantityPatternNameList.size(); p++) {
        quantityPatternNames[p] = quantityPatternNameList.get(p);
        quantityPatterns[p] = quantityPatternList.get(p);
        quantityUnitPatterns[p] = quantityUnitPatternList.get(p);
        //				quantityPatternMaxTokens[p] = quantityPatternMaxTokensList.get(p);
        quantityUnitPatternMaxTokens[p] = quantityUnitPatternMaxTokensList.get(p);
      }
    } catch (IOException e) {
      e.printStackTrace();
    }

    allPatternNames = new String[patterns.length + 1 + quantityUnitPatterns.length];
    for (int i = 0; i < patternNames.length; i++) allPatternNames[i] = patternNames[i];
    allPatternNames[patternNames.length] = "NEnumber";
    for (int i = 0; i < quantityPatternNames.length; i++)
      allPatternNames[patternNames.length + i + 1] = quantityPatternNames[i];
  }
示例#8
0
  /**
   * Creates a new instance of Ephyra and initializes the system.
   *
   * <p>For use as an API.
   *
   * @param dir directory of Ephyra
   */
  public OpenEphyra(String dir) {
    this.dir = dir;

    MsgPrinter.printInitializing();

    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer(dir + "res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
      MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();

    // create sentence detector
    MsgPrinter.printStatusMsg("Creating sentence detector...");
    if (!OpenNLP.createSentenceDetector(dir + "res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
      MsgPrinter.printErrorMsg("Could not create sentence detector.");
    LingPipe.createSentenceDetector();

    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();

    // create part of speech tagger
    MsgPrinter.printStatusMsg("Creating POS tagger...");
    if (!OpenNLP.createPosTagger(
        dir + "res/nlp/postagger/opennlp/tag.bin.gz", dir + "res/nlp/postagger/opennlp/tagdict"))
      MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init(dir + "res/nlp/postagger/stanford/" +
    //				"wsj3t0-18-bidirectional/train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");

    // create chunker
    MsgPrinter.printStatusMsg("Creating chunker...");
    if (!OpenNLP.createChunker(dir + "res/nlp/phrasechunker/opennlp/EnglishChunk.bin.gz"))
      MsgPrinter.printErrorMsg("Could not create chunker.");

    // create syntactic parser
    MsgPrinter.printStatusMsg("Creating syntactic parser...");
    //		if (!OpenNLP.createParser(dir + "res/nlp/syntacticparser/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP parser.");
    try {
      StanfordParser.initialize();
    } catch (Exception e) {
      MsgPrinter.printErrorMsg("Could not create Stanford parser.");
    }

    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers(dir + "res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers(dir + "res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //		if (!NETagger.loadNameFinders(dir + "res/nlp/netagger/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
      MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");

    // create linker
    //		MsgPrinter.printStatusMsg("Creating linker...");
    //		if (!OpenNLP.createLinker(dir + "res/nlp/corefresolver/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create linker.");

    // create WordNet dictionary
    MsgPrinter.printStatusMsg("Creating WordNet dictionary...");
    if (!WordNet.initialize(dir + "res/ontologies/wordnet/file_properties.xml"))
      MsgPrinter.printErrorMsg("Could not create WordNet dictionary.");

    // load function words (numbers are excluded)
    MsgPrinter.printStatusMsg("Loading function verbs...");
    if (!FunctionWords.loadIndex(dir + "res/indices/functionwords_nonumbers"))
      MsgPrinter.printErrorMsg("Could not load function words.");

    // load prepositions
    MsgPrinter.printStatusMsg("Loading prepositions...");
    if (!Prepositions.loadIndex(dir + "res/indices/prepositions"))
      MsgPrinter.printErrorMsg("Could not load prepositions.");

    // load irregular verbs
    MsgPrinter.printStatusMsg("Loading irregular verbs...");
    if (!IrregularVerbs.loadVerbs(dir + "res/indices/irregularverbs"))
      MsgPrinter.printErrorMsg("Could not load irregular verbs.");

    // load word frequencies
    MsgPrinter.printStatusMsg("Loading word frequencies...");
    if (!WordFrequencies.loadIndex(dir + "res/indices/wordfrequencies"))
      MsgPrinter.printErrorMsg("Could not load word frequencies.");

    // load query reformulators
    MsgPrinter.printStatusMsg("Loading query reformulators...");
    if (!QuestionReformulationG.loadReformulators(dir + "res/reformulations/"))
      MsgPrinter.printErrorMsg("Could not load query reformulators.");

    // load answer types
    //		MsgPrinter.printStatusMsg("Loading answer types...");
    //		if (!AnswerTypeTester.loadAnswerTypes(dir +
    //				"res/answertypes/patterns/answertypepatterns"))
    //			MsgPrinter.printErrorMsg("Could not load answer types.");

    // load question patterns
    MsgPrinter.printStatusMsg("Loading question patterns...");
    if (!QuestionInterpreter.loadPatterns(dir + "res/patternlearning/questionpatterns/"))
      MsgPrinter.printErrorMsg("Could not load question patterns.");

    // load answer patterns
    MsgPrinter.printStatusMsg("Loading answer patterns...");
    if (!AnswerPatternFilter.loadPatterns(dir + "res/patternlearning/answerpatterns/"))
      MsgPrinter.printErrorMsg("Could not load answer patterns.");
  }