/** * Initializes the list-based NE taggers. * * @param listDirectory path of the directory the list files are located in */ public static void loadListTaggers(String listDirectory) { if (lists.length > 0) return; MsgPrinter.printStatusMsg(" ...loading lists"); ArrayList<String> listsList = new ArrayList<String>(); ArrayList<String> listNamesList = new ArrayList<String>(); File[] listFiles = new File(listDirectory) .listFiles( new FileFilter() { public boolean accept(File pathname) { return pathname.getName().endsWith(".lst"); } }); Arrays.sort(listFiles); for (File list : listFiles) { String listName = list.getName(); listsList.add(list.getName()); listName = listName.substring(0, (listName.length() - 4)); listNamesList.add("NE" + listName); MsgPrinter.printStatusMsg(" ...for NE" + listName); } lists = listsList.toArray(new String[listsList.size()]); listNames = listNamesList.toArray(new String[listNamesList.size()]); }
/** * Entry point of Ephyra. Initializes the engine and starts the command line interface. * * @param args command line arguments are ignored */ public static void main(String[] args) { // enable output of status and error messages MsgPrinter.enableStatusMsgs(true); MsgPrinter.enableErrorMsgs(true); // set log file and enable logging Logger.setLogfile("log/OpenEphyra"); Logger.enableLogging(true); // initialize Ephyra and start command line interface (new OpenEphyra()).commandLine(); }
/** * Runs the pipeline and returns an array of up to <code>maxAnswers</code> results that have a * score of at least <code>absThresh</code>. * * @param aq analyzed question * @param maxAnswers maximum number of answers * @param absThresh absolute threshold for scores * @return array of results */ protected Result[] runPipeline(AnalyzedQuestion aq, int maxAnswers, float absThresh) { // query generation MsgPrinter.printGeneratingQueries(); Query[] queries = QueryGeneration.getQueries(aq); // search MsgPrinter.printSearching(); Result[] results = Search.doSearch(queries); // answer selection MsgPrinter.printSelectingAnswers(); results = AnswerSelection.getResults(results, maxAnswers, absThresh); return results; }
/** * A command line interface for Ephyra. * * <p>Repeatedly queries the user for a question, asks the system the question and prints out and * logs the results. * * <p>The command <code>exit</code> can be used to quit the program. */ public void commandLine() { while (true) { // query user for question, quit if user types in "exit" MsgPrinter.printQuestionPrompt(); String question = readLine().trim(); if (question.equalsIgnoreCase("exit")) System.exit(0); // determine question type and extract question string String type; if (question.matches("(?i)" + FACTOID + ":.*+")) { // factoid question type = FACTOID; question = question.split(":", 2)[1].trim(); } else if (question.matches("(?i)" + LIST + ":.*+")) { // list question type = LIST; question = question.split(":", 2)[1].trim(); } else { // question type unspecified type = FACTOID; // default type } // ask question Result[] results = new Result[0]; if (type.equals(FACTOID)) { Logger.logFactoidStart(question); results = askFactoid(question, FACTOID_MAX_ANSWERS, FACTOID_ABS_THRESH); Logger.logResults(results); Logger.logFactoidEnd(); } else if (type.equals(LIST)) { Logger.logListStart(question); results = askList(question, LIST_REL_THRESH); Logger.logResults(results); Logger.logListEnd(); } // print answers MsgPrinter.printAnswers(results); } }
/** * Asks Ephyra a factoid question and returns up to <code>maxAnswers</code> results that have a * score of at least <code>absThresh</code>. * * @param question factoid question * @param maxAnswers maximum number of answers * @param absThresh absolute threshold for scores * @return array of results */ public Result[] askFactoid(String question, int maxAnswers, float absThresh) { // initialize pipeline initFactoid(); // analyze question MsgPrinter.printAnalyzingQuestion(); AnalyzedQuestion aq = QuestionAnalysis.analyze(question); // get answers Result[] results = runPipeline(aq, maxAnswers, absThresh); return results; }
/** * Creates the OpenNLP name finders and sets the named entity types that are recognized by the * finders. * * @param dir directory containing the models for the name finders * @return true, iff the name finders were created successfully */ public static boolean loadNameFinders(String dir) { File[] files = FileUtils.getFiles(dir); finders = new NameFinder[files.length]; finderNames = new String[files.length]; try { for (int i = 0; i < files.length; i++) { MaxentModel model = new SuffixSensitiveGISModelReader(files[i]).getModel(); finders[i] = new NameFinder(model); finderNames[i] = files[i].getName().split("\\.")[0]; MsgPrinter.printStatusMsg(" ...for " + finderNames[i]); } } catch (IOException e) { return false; } return true; }
/** * Initializes the regular expression based NE taggers. * * @param regExListFileName path and name of the file the names of the patterns in use are found * in */ public static void loadRegExTaggers(String regExListFileName) { if (patterns.length > 0) return; MsgPrinter.printStatusMsg(" ...loading patterns"); ArrayList<String> patternNameList = new ArrayList<String>(); ArrayList<Pattern> patternList = new ArrayList<Pattern>(); ArrayList<Integer> patternMaxTokensList = new ArrayList<Integer>(); ArrayList<String> quantityPatternNameList = new ArrayList<String>(); ArrayList<Pattern> quantityPatternList = new ArrayList<Pattern>(); ArrayList<Integer> quantityPatternMaxTokensList = new ArrayList<Integer>(); ArrayList<String> quantityUnitPatternNameList = new ArrayList<String>(); ArrayList<Pattern> quantityUnitPatternList = new ArrayList<Pattern>(); ArrayList<Integer> quantityUnitPatternMaxTokensList = new ArrayList<Integer>(); try { BufferedReader br = new BufferedReader( new FileReader( regExListFileName)); // new BufferedReader(new // FileReader("./res/nlp/netagger/patterns.lst")); String line; while ((line = br.readLine()) != null) { String neName = "NE" + line; String patternFieldNamePrefix = ""; for (int c = 0; c < line.length(); c++) { char ch = line.charAt(c); if (Character.isUpperCase(ch)) patternFieldNamePrefix += "_" + ch; else patternFieldNamePrefix += Character.toUpperCase(ch); } String regExFieldName = patternFieldNamePrefix; String patternFieldName = patternFieldNamePrefix + "_PATTERN"; String maxTokensFieldName = patternFieldNamePrefix + "_MAX_TOKENS"; try { Field regExField = RegExMatcher.class.getField(regExFieldName); Field patternField = RegExMatcher.class.getField(patternFieldName); Field maxTokensField = RegExMatcher.class.getField(maxTokensFieldName); String regEx = regExField.get(null).toString(); Pattern pattern = ((Pattern) patternField.get(null)); int maxTokens = maxTokensField.getInt(null); boolean isQuantity = ((regEx.indexOf(RegExMatcher.NUMBER) != -1) && !regEx.equals(RegExMatcher.NUMBER)); if (isQuantity) { try { String unitPatternFieldName = patternFieldNamePrefix + "_UNIT_PATTERN"; String unitMaxTokensFieldName = patternFieldNamePrefix + "_UNIT_MAX_TOKENS"; Field unitPatternField = RegExMatcher.class.getField(unitPatternFieldName); Field unitMaxTokensField = RegExMatcher.class.getField(unitMaxTokensFieldName); Pattern unitPattern = ((Pattern) unitPatternField.get(null)); int unitMaxTokens = unitMaxTokensField.getInt(null); quantityPatternNameList.add(neName); quantityPatternList.add(pattern); quantityPatternMaxTokensList.add(new Integer(maxTokens)); quantityUnitPatternNameList.add(neName); quantityUnitPatternList.add(unitPattern); quantityUnitPatternMaxTokensList.add(new Integer(unitMaxTokens)); } catch (Exception e) { isQuantity = false; } } if (!isQuantity) { patternNameList.add(neName); patternList.add(pattern); patternMaxTokensList.add(new Integer(maxTokens)); } MsgPrinter.printStatusMsg(" ...for " + neName); } catch (Exception e) { MsgPrinter.printErrorMsg(" ...could not add " + neName); } } patternNames = new String[patternNameList.size()]; patterns = new Pattern[patternList.size()]; patternMaxTokens = new int[patternMaxTokensList.size()]; for (int p = 0; p < patternNameList.size(); p++) { patternNames[p] = patternNameList.get(p); patterns[p] = patternList.get(p); patternMaxTokens[p] = patternMaxTokensList.get(p).intValue(); } quantityPatternNames = new String[quantityPatternNameList.size()]; quantityPatterns = new Pattern[quantityPatternList.size()]; quantityUnitPatterns = new Pattern[quantityUnitPatternList.size()]; // quantityPatternMaxTokens = new int[quantityPatternMaxTokensList.size()]; quantityUnitPatternMaxTokens = new int[quantityUnitPatternMaxTokensList.size()]; for (int p = 0; p < quantityPatternNameList.size(); p++) { quantityPatternNames[p] = quantityPatternNameList.get(p); quantityPatterns[p] = quantityPatternList.get(p); quantityUnitPatterns[p] = quantityUnitPatternList.get(p); // quantityPatternMaxTokens[p] = quantityPatternMaxTokensList.get(p); quantityUnitPatternMaxTokens[p] = quantityUnitPatternMaxTokensList.get(p); } } catch (IOException e) { e.printStackTrace(); } allPatternNames = new String[patterns.length + 1 + quantityUnitPatterns.length]; for (int i = 0; i < patternNames.length; i++) allPatternNames[i] = patternNames[i]; allPatternNames[patternNames.length] = "NEnumber"; for (int i = 0; i < quantityPatternNames.length; i++) allPatternNames[patternNames.length + i + 1] = quantityPatternNames[i]; }
/** * Creates a new instance of Ephyra and initializes the system. * * <p>For use as an API. * * @param dir directory of Ephyra */ public OpenEphyra(String dir) { this.dir = dir; MsgPrinter.printInitializing(); // create tokenizer MsgPrinter.printStatusMsg("Creating tokenizer..."); if (!OpenNLP.createTokenizer(dir + "res/nlp/tokenizer/opennlp/EnglishTok.bin.gz")) MsgPrinter.printErrorMsg("Could not create tokenizer."); // LingPipe.createTokenizer(); // create sentence detector MsgPrinter.printStatusMsg("Creating sentence detector..."); if (!OpenNLP.createSentenceDetector(dir + "res/nlp/sentencedetector/opennlp/EnglishSD.bin.gz")) MsgPrinter.printErrorMsg("Could not create sentence detector."); LingPipe.createSentenceDetector(); // create stemmer MsgPrinter.printStatusMsg("Creating stemmer..."); SnowballStemmer.create(); // create part of speech tagger MsgPrinter.printStatusMsg("Creating POS tagger..."); if (!OpenNLP.createPosTagger( dir + "res/nlp/postagger/opennlp/tag.bin.gz", dir + "res/nlp/postagger/opennlp/tagdict")) MsgPrinter.printErrorMsg("Could not create OpenNLP POS tagger."); // if (!StanfordPosTagger.init(dir + "res/nlp/postagger/stanford/" + // "wsj3t0-18-bidirectional/train-wsj-0-18.holder")) // MsgPrinter.printErrorMsg("Could not create Stanford POS tagger."); // create chunker MsgPrinter.printStatusMsg("Creating chunker..."); if (!OpenNLP.createChunker(dir + "res/nlp/phrasechunker/opennlp/EnglishChunk.bin.gz")) MsgPrinter.printErrorMsg("Could not create chunker."); // create syntactic parser MsgPrinter.printStatusMsg("Creating syntactic parser..."); // if (!OpenNLP.createParser(dir + "res/nlp/syntacticparser/opennlp/")) // MsgPrinter.printErrorMsg("Could not create OpenNLP parser."); try { StanfordParser.initialize(); } catch (Exception e) { MsgPrinter.printErrorMsg("Could not create Stanford parser."); } // create named entity taggers MsgPrinter.printStatusMsg("Creating NE taggers..."); NETagger.loadListTaggers(dir + "res/nlp/netagger/lists/"); NETagger.loadRegExTaggers(dir + "res/nlp/netagger/patterns.lst"); MsgPrinter.printStatusMsg(" ...loading models"); // if (!NETagger.loadNameFinders(dir + "res/nlp/netagger/opennlp/")) // MsgPrinter.printErrorMsg("Could not create OpenNLP NE tagger."); if (!StanfordNeTagger.isInitialized() && !StanfordNeTagger.init()) MsgPrinter.printErrorMsg("Could not create Stanford NE tagger."); MsgPrinter.printStatusMsg(" ...done"); // create linker // MsgPrinter.printStatusMsg("Creating linker..."); // if (!OpenNLP.createLinker(dir + "res/nlp/corefresolver/opennlp/")) // MsgPrinter.printErrorMsg("Could not create linker."); // create WordNet dictionary MsgPrinter.printStatusMsg("Creating WordNet dictionary..."); if (!WordNet.initialize(dir + "res/ontologies/wordnet/file_properties.xml")) MsgPrinter.printErrorMsg("Could not create WordNet dictionary."); // load function words (numbers are excluded) MsgPrinter.printStatusMsg("Loading function verbs..."); if (!FunctionWords.loadIndex(dir + "res/indices/functionwords_nonumbers")) MsgPrinter.printErrorMsg("Could not load function words."); // load prepositions MsgPrinter.printStatusMsg("Loading prepositions..."); if (!Prepositions.loadIndex(dir + "res/indices/prepositions")) MsgPrinter.printErrorMsg("Could not load prepositions."); // load irregular verbs MsgPrinter.printStatusMsg("Loading irregular verbs..."); if (!IrregularVerbs.loadVerbs(dir + "res/indices/irregularverbs")) MsgPrinter.printErrorMsg("Could not load irregular verbs."); // load word frequencies MsgPrinter.printStatusMsg("Loading word frequencies..."); if (!WordFrequencies.loadIndex(dir + "res/indices/wordfrequencies")) MsgPrinter.printErrorMsg("Could not load word frequencies."); // load query reformulators MsgPrinter.printStatusMsg("Loading query reformulators..."); if (!QuestionReformulationG.loadReformulators(dir + "res/reformulations/")) MsgPrinter.printErrorMsg("Could not load query reformulators."); // load answer types // MsgPrinter.printStatusMsg("Loading answer types..."); // if (!AnswerTypeTester.loadAnswerTypes(dir + // "res/answertypes/patterns/answertypepatterns")) // MsgPrinter.printErrorMsg("Could not load answer types."); // load question patterns MsgPrinter.printStatusMsg("Loading question patterns..."); if (!QuestionInterpreter.loadPatterns(dir + "res/patternlearning/questionpatterns/")) MsgPrinter.printErrorMsg("Could not load question patterns."); // load answer patterns MsgPrinter.printStatusMsg("Loading answer patterns..."); if (!AnswerPatternFilter.loadPatterns(dir + "res/patternlearning/answerpatterns/")) MsgPrinter.printErrorMsg("Could not load answer patterns."); }