/**
   * Initializes the regular expression based NE taggers.
   *
   * @param regExListFileName path and name of the file the names of the patterns in use are found
   *     in
   */
  public static void loadRegExTaggers(String regExListFileName) {
    if (patterns.length > 0) return;

    MsgPrinter.printStatusMsg("  ...loading patterns");

    ArrayList<String> patternNameList = new ArrayList<String>();
    ArrayList<Pattern> patternList = new ArrayList<Pattern>();
    ArrayList<Integer> patternMaxTokensList = new ArrayList<Integer>();

    ArrayList<String> quantityPatternNameList = new ArrayList<String>();
    ArrayList<Pattern> quantityPatternList = new ArrayList<Pattern>();
    ArrayList<Integer> quantityPatternMaxTokensList = new ArrayList<Integer>();

    ArrayList<String> quantityUnitPatternNameList = new ArrayList<String>();
    ArrayList<Pattern> quantityUnitPatternList = new ArrayList<Pattern>();
    ArrayList<Integer> quantityUnitPatternMaxTokensList = new ArrayList<Integer>();

    try {
      BufferedReader br =
          new BufferedReader(
              new FileReader(
                  regExListFileName)); // new BufferedReader(new
                                       // FileReader("./res/nlp/netagger/patterns.lst"));
      String line;
      while ((line = br.readLine()) != null) {
        String neName = "NE" + line;
        String patternFieldNamePrefix = "";
        for (int c = 0; c < line.length(); c++) {
          char ch = line.charAt(c);
          if (Character.isUpperCase(ch)) patternFieldNamePrefix += "_" + ch;
          else patternFieldNamePrefix += Character.toUpperCase(ch);
        }

        String regExFieldName = patternFieldNamePrefix;
        String patternFieldName = patternFieldNamePrefix + "_PATTERN";
        String maxTokensFieldName = patternFieldNamePrefix + "_MAX_TOKENS";

        try {
          Field regExField = RegExMatcher.class.getField(regExFieldName);
          Field patternField = RegExMatcher.class.getField(patternFieldName);
          Field maxTokensField = RegExMatcher.class.getField(maxTokensFieldName);

          String regEx = regExField.get(null).toString();
          Pattern pattern = ((Pattern) patternField.get(null));
          int maxTokens = maxTokensField.getInt(null);

          boolean isQuantity =
              ((regEx.indexOf(RegExMatcher.NUMBER) != -1) && !regEx.equals(RegExMatcher.NUMBER));

          if (isQuantity) {
            try {
              String unitPatternFieldName = patternFieldNamePrefix + "_UNIT_PATTERN";
              String unitMaxTokensFieldName = patternFieldNamePrefix + "_UNIT_MAX_TOKENS";

              Field unitPatternField = RegExMatcher.class.getField(unitPatternFieldName);
              Field unitMaxTokensField = RegExMatcher.class.getField(unitMaxTokensFieldName);

              Pattern unitPattern = ((Pattern) unitPatternField.get(null));
              int unitMaxTokens = unitMaxTokensField.getInt(null);

              quantityPatternNameList.add(neName);
              quantityPatternList.add(pattern);
              quantityPatternMaxTokensList.add(new Integer(maxTokens));

              quantityUnitPatternNameList.add(neName);
              quantityUnitPatternList.add(unitPattern);
              quantityUnitPatternMaxTokensList.add(new Integer(unitMaxTokens));
            } catch (Exception e) {
              isQuantity = false;
            }
          }

          if (!isQuantity) {
            patternNameList.add(neName);
            patternList.add(pattern);
            patternMaxTokensList.add(new Integer(maxTokens));
          }

          MsgPrinter.printStatusMsg("    ...for " + neName);
        } catch (Exception e) {
          MsgPrinter.printErrorMsg("    ...could not add " + neName);
        }
      }

      patternNames = new String[patternNameList.size()];
      patterns = new Pattern[patternList.size()];
      patternMaxTokens = new int[patternMaxTokensList.size()];
      for (int p = 0; p < patternNameList.size(); p++) {
        patternNames[p] = patternNameList.get(p);
        patterns[p] = patternList.get(p);
        patternMaxTokens[p] = patternMaxTokensList.get(p).intValue();
      }

      quantityPatternNames = new String[quantityPatternNameList.size()];
      quantityPatterns = new Pattern[quantityPatternList.size()];
      quantityUnitPatterns = new Pattern[quantityUnitPatternList.size()];
      //			quantityPatternMaxTokens = new int[quantityPatternMaxTokensList.size()];
      quantityUnitPatternMaxTokens = new int[quantityUnitPatternMaxTokensList.size()];
      for (int p = 0; p < quantityPatternNameList.size(); p++) {
        quantityPatternNames[p] = quantityPatternNameList.get(p);
        quantityPatterns[p] = quantityPatternList.get(p);
        quantityUnitPatterns[p] = quantityUnitPatternList.get(p);
        //				quantityPatternMaxTokens[p] = quantityPatternMaxTokensList.get(p);
        quantityUnitPatternMaxTokens[p] = quantityUnitPatternMaxTokensList.get(p);
      }
    } catch (IOException e) {
      e.printStackTrace();
    }

    allPatternNames = new String[patterns.length + 1 + quantityUnitPatterns.length];
    for (int i = 0; i < patternNames.length; i++) allPatternNames[i] = patternNames[i];
    allPatternNames[patternNames.length] = "NEnumber";
    for (int i = 0; i < quantityPatternNames.length; i++)
      allPatternNames[patternNames.length + i + 1] = quantityPatternNames[i];
  }
Example #2
0
 /**
  * Prints out an arbitrary error message with a timestamp.
  *
  * @param error an error message
  */
 public static synchronized void printErrorMsgTimestamp(String error) {
   if (errorMsgs) printErrorMsg(error + " (" + getTimestamp() + ")");
 }
Example #3
0
  /**
   * Creates a new instance of Ephyra and initializes the system.
   *
   * <p>For use as an API.
   *
   * @param dir directory of Ephyra
   */
  public OpenEphyra(String dir) {
    this.dir = dir;

    MsgPrinter.printInitializing();

    // create tokenizer
    MsgPrinter.printStatusMsg("Creating tokenizer...");
    if (!OpenNLP.createTokenizer(dir + "res/nlp/tokenizer/opennlp/EnglishTok.bin.gz"))
      MsgPrinter.printErrorMsg("Could not create tokenizer.");
    //		LingPipe.createTokenizer();

    // create sentence detector
    MsgPrinter.printStatusMsg("Creating sentence detector...");
    if (!OpenNLP.createSentenceDetector(dir + "res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz"))
      MsgPrinter.printErrorMsg("Could not create sentence detector.");
    LingPipe.createSentenceDetector();

    // create stemmer
    MsgPrinter.printStatusMsg("Creating stemmer...");
    SnowballStemmer.create();

    // create part of speech tagger
    MsgPrinter.printStatusMsg("Creating POS tagger...");
    if (!OpenNLP.createPosTagger(
        dir + "res/nlp/postagger/opennlp/tag.bin.gz", dir + "res/nlp/postagger/opennlp/tagdict"))
      MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger.");
    //		if (!StanfordPosTagger.init(dir + "res/nlp/postagger/stanford/" +
    //				"wsj3t0-18-bidirectional/train-wsj-0-18.holder"))
    //			MsgPrinter.printErrorMsg("Could not create Stanford POS tagger.");

    // create chunker
    MsgPrinter.printStatusMsg("Creating chunker...");
    if (!OpenNLP.createChunker(dir + "res/nlp/phrasechunker/opennlp/EnglishChunk.bin.gz"))
      MsgPrinter.printErrorMsg("Could not create chunker.");

    // create syntactic parser
    MsgPrinter.printStatusMsg("Creating syntactic parser...");
    //		if (!OpenNLP.createParser(dir + "res/nlp/syntacticparser/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP parser.");
    try {
      StanfordParser.initialize();
    } catch (Exception e) {
      MsgPrinter.printErrorMsg("Could not create Stanford parser.");
    }

    // create named entity taggers
    MsgPrinter.printStatusMsg("Creating NE taggers...");
    NETagger.loadListTaggers(dir + "res/nlp/netagger/lists/");
    NETagger.loadRegExTaggers(dir + "res/nlp/netagger/patterns.lst");
    MsgPrinter.printStatusMsg("  ...loading models");
    //		if (!NETagger.loadNameFinders(dir + "res/nlp/netagger/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger.");
    if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init())
      MsgPrinter.printErrorMsg("Could not create Stanford NE tagger.");
    MsgPrinter.printStatusMsg("  ...done");

    // create linker
    //		MsgPrinter.printStatusMsg("Creating linker...");
    //		if (!OpenNLP.createLinker(dir + "res/nlp/corefresolver/opennlp/"))
    //			MsgPrinter.printErrorMsg("Could not create linker.");

    // create WordNet dictionary
    MsgPrinter.printStatusMsg("Creating WordNet dictionary...");
    if (!WordNet.initialize(dir + "res/ontologies/wordnet/file_properties.xml"))
      MsgPrinter.printErrorMsg("Could not create WordNet dictionary.");

    // load function words (numbers are excluded)
    MsgPrinter.printStatusMsg("Loading function verbs...");
    if (!FunctionWords.loadIndex(dir + "res/indices/functionwords_nonumbers"))
      MsgPrinter.printErrorMsg("Could not load function words.");

    // load prepositions
    MsgPrinter.printStatusMsg("Loading prepositions...");
    if (!Prepositions.loadIndex(dir + "res/indices/prepositions"))
      MsgPrinter.printErrorMsg("Could not load prepositions.");

    // load irregular verbs
    MsgPrinter.printStatusMsg("Loading irregular verbs...");
    if (!IrregularVerbs.loadVerbs(dir + "res/indices/irregularverbs"))
      MsgPrinter.printErrorMsg("Could not load irregular verbs.");

    // load word frequencies
    MsgPrinter.printStatusMsg("Loading word frequencies...");
    if (!WordFrequencies.loadIndex(dir + "res/indices/wordfrequencies"))
      MsgPrinter.printErrorMsg("Could not load word frequencies.");

    // load query reformulators
    MsgPrinter.printStatusMsg("Loading query reformulators...");
    if (!QuestionReformulationG.loadReformulators(dir + "res/reformulations/"))
      MsgPrinter.printErrorMsg("Could not load query reformulators.");

    // load answer types
    //		MsgPrinter.printStatusMsg("Loading answer types...");
    //		if (!AnswerTypeTester.loadAnswerTypes(dir +
    //				"res/answertypes/patterns/answertypepatterns"))
    //			MsgPrinter.printErrorMsg("Could not load answer types.");

    // load question patterns
    MsgPrinter.printStatusMsg("Loading question patterns...");
    if (!QuestionInterpreter.loadPatterns(dir + "res/patternlearning/questionpatterns/"))
      MsgPrinter.printErrorMsg("Could not load question patterns.");

    // load answer patterns
    MsgPrinter.printStatusMsg("Loading answer patterns...");
    if (!AnswerPatternFilter.loadPatterns(dir + "res/patternlearning/answerpatterns/"))
      MsgPrinter.printErrorMsg("Could not load answer patterns.");
  }