/** * Initializes the regular expression based NE taggers. * * @param regExListFileName path and name of the file the names of the patterns in use are found * in */ public static void loadRegExTaggers(String regExListFileName) { if (patterns.length > 0) return; MsgPrinter.printStatusMsg(" ...loading patterns"); ArrayList<String> patternNameList = new ArrayList<String>(); ArrayList<Pattern> patternList = new ArrayList<Pattern>(); ArrayList<Integer> patternMaxTokensList = new ArrayList<Integer>(); ArrayList<String> quantityPatternNameList = new ArrayList<String>(); ArrayList<Pattern> quantityPatternList = new ArrayList<Pattern>(); ArrayList<Integer> quantityPatternMaxTokensList = new ArrayList<Integer>(); ArrayList<String> quantityUnitPatternNameList = new ArrayList<String>(); ArrayList<Pattern> quantityUnitPatternList = new ArrayList<Pattern>(); ArrayList<Integer> quantityUnitPatternMaxTokensList = new ArrayList<Integer>(); try { BufferedReader br = new BufferedReader( new FileReader( regExListFileName)); // new BufferedReader(new // FileReader("./res/nlp/netagger/patterns.lst")); String line; while ((line = br.readLine()) != null) { String neName = "NE" + line; String patternFieldNamePrefix = ""; for (int c = 0; c < line.length(); c++) { char ch = line.charAt(c); if (Character.isUpperCase(ch)) patternFieldNamePrefix += "_" + ch; else patternFieldNamePrefix += Character.toUpperCase(ch); } String regExFieldName = patternFieldNamePrefix; String patternFieldName = patternFieldNamePrefix + "_PATTERN"; String maxTokensFieldName = patternFieldNamePrefix + "_MAX_TOKENS"; try { Field regExField = RegExMatcher.class.getField(regExFieldName); Field patternField = RegExMatcher.class.getField(patternFieldName); Field maxTokensField = RegExMatcher.class.getField(maxTokensFieldName); String regEx = regExField.get(null).toString(); Pattern pattern = ((Pattern) patternField.get(null)); int maxTokens = maxTokensField.getInt(null); boolean isQuantity = ((regEx.indexOf(RegExMatcher.NUMBER) != -1) && !regEx.equals(RegExMatcher.NUMBER)); if (isQuantity) { try { String unitPatternFieldName = patternFieldNamePrefix + "_UNIT_PATTERN"; String unitMaxTokensFieldName = patternFieldNamePrefix + "_UNIT_MAX_TOKENS"; Field unitPatternField = RegExMatcher.class.getField(unitPatternFieldName); Field unitMaxTokensField = RegExMatcher.class.getField(unitMaxTokensFieldName); Pattern unitPattern = ((Pattern) unitPatternField.get(null)); int unitMaxTokens = unitMaxTokensField.getInt(null); quantityPatternNameList.add(neName); quantityPatternList.add(pattern); quantityPatternMaxTokensList.add(new Integer(maxTokens)); quantityUnitPatternNameList.add(neName); quantityUnitPatternList.add(unitPattern); quantityUnitPatternMaxTokensList.add(new Integer(unitMaxTokens)); } catch (Exception e) { isQuantity = false; } } if (!isQuantity) { patternNameList.add(neName); patternList.add(pattern); patternMaxTokensList.add(new Integer(maxTokens)); } MsgPrinter.printStatusMsg(" ...for " + neName); } catch (Exception e) { MsgPrinter.printErrorMsg(" ...could not add " + neName); } } patternNames = new String[patternNameList.size()]; patterns = new Pattern[patternList.size()]; patternMaxTokens = new int[patternMaxTokensList.size()]; for (int p = 0; p < patternNameList.size(); p++) { patternNames[p] = patternNameList.get(p); patterns[p] = patternList.get(p); patternMaxTokens[p] = patternMaxTokensList.get(p).intValue(); } quantityPatternNames = new String[quantityPatternNameList.size()]; quantityPatterns = new Pattern[quantityPatternList.size()]; quantityUnitPatterns = new Pattern[quantityUnitPatternList.size()]; // quantityPatternMaxTokens = new int[quantityPatternMaxTokensList.size()]; quantityUnitPatternMaxTokens = new int[quantityUnitPatternMaxTokensList.size()]; for (int p = 0; p < quantityPatternNameList.size(); p++) { quantityPatternNames[p] = quantityPatternNameList.get(p); quantityPatterns[p] = quantityPatternList.get(p); quantityUnitPatterns[p] = quantityUnitPatternList.get(p); // quantityPatternMaxTokens[p] = quantityPatternMaxTokensList.get(p); quantityUnitPatternMaxTokens[p] = quantityUnitPatternMaxTokensList.get(p); } } catch (IOException e) { e.printStackTrace(); } allPatternNames = new String[patterns.length + 1 + quantityUnitPatterns.length]; for (int i = 0; i < patternNames.length; i++) allPatternNames[i] = patternNames[i]; allPatternNames[patternNames.length] = "NEnumber"; for (int i = 0; i < quantityPatternNames.length; i++) allPatternNames[patternNames.length + i + 1] = quantityPatternNames[i]; }
/** * Prints out an arbitrary error message with a timestamp. * * @param error an error message */ public static synchronized void printErrorMsgTimestamp(String error) { if (errorMsgs) printErrorMsg(error + " (" + getTimestamp() + ")"); }
/** * Creates a new instance of Ephyra and initializes the system. * * <p>For use as an API. * * @param dir directory of Ephyra */ public OpenEphyra(String dir) { this.dir = dir; MsgPrinter.printInitializing(); // create tokenizer MsgPrinter.printStatusMsg("Creating tokenizer..."); if (!OpenNLP.createTokenizer(dir + "res/nlp/tokenizer/opennlp/EnglishTok.bin.gz")) MsgPrinter.printErrorMsg("Could not create tokenizer."); // LingPipe.createTokenizer(); // create sentence detector MsgPrinter.printStatusMsg("Creating sentence detector..."); if (!OpenNLP.createSentenceDetector(dir + "res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz")) MsgPrinter.printErrorMsg("Could not create sentence detector."); LingPipe.createSentenceDetector(); // create stemmer MsgPrinter.printStatusMsg("Creating stemmer..."); SnowballStemmer.create(); // create part of speech tagger MsgPrinter.printStatusMsg("Creating POS tagger..."); if (!OpenNLP.createPosTagger( dir + "res/nlp/postagger/opennlp/tag.bin.gz", dir + "res/nlp/postagger/opennlp/tagdict")) MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger."); // if (!StanfordPosTagger.init(dir + "res/nlp/postagger/stanford/" + // "wsj3t0-18-bidirectional/train-wsj-0-18.holder")) // MsgPrinter.printErrorMsg("Could not create Stanford POS tagger."); // create chunker MsgPrinter.printStatusMsg("Creating chunker..."); if (!OpenNLP.createChunker(dir + "res/nlp/phrasechunker/opennlp/EnglishChunk.bin.gz")) MsgPrinter.printErrorMsg("Could not create chunker."); // create syntactic parser MsgPrinter.printStatusMsg("Creating syntactic parser..."); // if (!OpenNLP.createParser(dir + "res/nlp/syntacticparser/opennlp/")) // MsgPrinter.printErrorMsg("Could not create OpenNLP parser."); try { StanfordParser.initialize(); } catch (Exception e) { MsgPrinter.printErrorMsg("Could not create Stanford parser."); } // create named entity taggers MsgPrinter.printStatusMsg("Creating NE taggers..."); NETagger.loadListTaggers(dir + "res/nlp/netagger/lists/"); NETagger.loadRegExTaggers(dir + "res/nlp/netagger/patterns.lst"); MsgPrinter.printStatusMsg(" ...loading models"); // if (!NETagger.loadNameFinders(dir + "res/nlp/netagger/opennlp/")) // MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger."); if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init()) MsgPrinter.printErrorMsg("Could not create Stanford NE tagger."); MsgPrinter.printStatusMsg(" ...done"); // create linker // MsgPrinter.printStatusMsg("Creating linker..."); // if (!OpenNLP.createLinker(dir + "res/nlp/corefresolver/opennlp/")) // MsgPrinter.printErrorMsg("Could not create linker."); // create WordNet dictionary MsgPrinter.printStatusMsg("Creating WordNet dictionary..."); if (!WordNet.initialize(dir + "res/ontologies/wordnet/file_properties.xml")) MsgPrinter.printErrorMsg("Could not create WordNet dictionary."); // load function words (numbers are excluded) MsgPrinter.printStatusMsg("Loading function verbs..."); if (!FunctionWords.loadIndex(dir + "res/indices/functionwords_nonumbers")) MsgPrinter.printErrorMsg("Could not load function words."); // load prepositions MsgPrinter.printStatusMsg("Loading prepositions..."); if (!Prepositions.loadIndex(dir + "res/indices/prepositions")) MsgPrinter.printErrorMsg("Could not load prepositions."); // load irregular verbs MsgPrinter.printStatusMsg("Loading irregular verbs..."); if (!IrregularVerbs.loadVerbs(dir + "res/indices/irregularverbs")) MsgPrinter.printErrorMsg("Could not load irregular verbs."); // load word frequencies MsgPrinter.printStatusMsg("Loading word frequencies..."); if (!WordFrequencies.loadIndex(dir + "res/indices/wordfrequencies")) MsgPrinter.printErrorMsg("Could not load word frequencies."); // load query reformulators MsgPrinter.printStatusMsg("Loading query reformulators..."); if (!QuestionReformulationG.loadReformulators(dir + "res/reformulations/")) MsgPrinter.printErrorMsg("Could not load query reformulators."); // load answer types // MsgPrinter.printStatusMsg("Loading answer types..."); // if (!AnswerTypeTester.loadAnswerTypes(dir + // "res/answertypes/patterns/answertypepatterns")) // MsgPrinter.printErrorMsg("Could not load answer types."); // load question patterns MsgPrinter.printStatusMsg("Loading question patterns..."); if (!QuestionInterpreter.loadPatterns(dir + "res/patternlearning/questionpatterns/")) MsgPrinter.printErrorMsg("Could not load question patterns."); // load answer patterns MsgPrinter.printStatusMsg("Loading answer patterns..."); if (!AnswerPatternFilter.loadPatterns(dir + "res/patternlearning/answerpatterns/")) MsgPrinter.printErrorMsg("Could not load answer patterns."); }