/** * Initialise the ANNIE system. This creates a "corpus pipeline" application that can be used to * run sets of documents through the extraction system. */ public void initAnnie() throws GateException, IOException { Out.prln("Initialising ANNIE..."); // load the ANNIE application from the saved state in plugins/ANNIE File pluginsHome = Gate.getPluginsHome(); File anniePlugin = new File(pluginsHome, "ANNIE"); File annieGapp = new File(anniePlugin, "ANNIE_with_defaults.gapp"); annieController = (CorpusController) PersistenceManager.loadObjectFromFile(annieGapp); Out.prln("...ANNIE loaded"); } // initAnnie()
/** * Prints one Unicode property value per line, along with its aliases, if any, for the given * unicodeVersion. * * @param unicodeVersion The Unicode version to print property values and aliases for * @throws UnicodeProperties.UnsupportedUnicodeVersionException if unicodeVersion is not supported */ private static void printUnicodePropertyValuesAndAliases(String unicodeVersion) throws UnicodeProperties.UnsupportedUnicodeVersionException { Pattern versionPattern = Pattern.compile("(\\d+)(?:\\.(\\d+))?(?:\\.\\d+)?"); Matcher matcher = versionPattern.matcher(unicodeVersion); if (!matcher.matches()) { throw new UnicodeProperties.UnsupportedUnicodeVersionException(); } String underscoreVersion = matcher.group(1) + (null == matcher.group(2) ? "_0" : "_" + matcher.group(2)); String[] propertyValues; String[] propertyValueAliases; try { Class<?> clazz = Class.forName("jflex.unicode.data.Unicode_" + underscoreVersion); Field field = clazz.getField("propertyValues"); propertyValues = (String[]) field.get(null); field = clazz.getField("propertyValueAliases"); propertyValueAliases = (String[]) field.get(null); } catch (Exception e) { throw new UnicodeProperties.UnsupportedUnicodeVersionException(); } SortedMap<String, SortedSet<String>> propertyValuesToAliases = new TreeMap<String, SortedSet<String>>(); for (String value : propertyValues) { propertyValuesToAliases.put(value, new TreeSet<String>()); } for (int i = 0; i < propertyValueAliases.length; i += 2) { String alias = propertyValueAliases[i]; String value = propertyValueAliases[i + 1]; SortedSet<String> aliases = propertyValuesToAliases.get(value); if (null == aliases) { aliases = new TreeSet<String>(); propertyValuesToAliases.put(value, aliases); } aliases.add(alias); } for (Map.Entry<String, SortedSet<String>> entry : propertyValuesToAliases.entrySet()) { String value = entry.getKey(); SortedSet<String> aliases = entry.getValue(); Out.print(value); if (aliases.size() > 0) { for (String alias : aliases) { Out.print(", " + alias); } } Out.println(""); } }
/** * Starts the generation process with the files in <code>argv</code> or pops up a window to choose * a file, when <code>argv</code> doesn't have any file entries. * * @param argv the commandline. */ public static void main(String argv[]) { try { generate(argv); } catch (GeneratorException e) { Out.statistics(); System.exit(1); } catch (SilentExit e) { System.exit(1); } }
/** * Stores a new macro and its definition. * * @param name the name of the new macro * @param definition the definition of the new macro * @return <code>true</code>, iff the macro name has not been stored before. */ public boolean insert(String name, RegExp definition) { if (Options.DEBUG) Out.debug( "inserting macro " + name + " with definition :" + Out.NL + definition); //$NON-NLS-1$ //$NON-NLS-2$ used.put(name, Boolean.FALSE); return macros.put(name, definition) == null; }
/** * Generates a scanner for the specified input file. * * @param inputFile a file containing a lexical specification to generate a scanner for. */ public static void generate(File inputFile) { Out.resetCounters(); Timer totalTime = new Timer(); Timer time = new Timer(); LexScan scanner = null; LexParse parser = null; FileReader inputReader = null; totalTime.start(); try { Out.println(ErrorMessages.READING, inputFile.toString()); inputReader = new FileReader(inputFile); scanner = new LexScan(inputReader); scanner.setFile(inputFile); parser = new LexParse(scanner); } catch (FileNotFoundException e) { Out.error(ErrorMessages.CANNOT_OPEN, inputFile.toString()); throw new GeneratorException(); } try { NFA nfa = (NFA) parser.parse().value; Out.checkErrors(); if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.NFA_IS) + Out.NL + nfa + Out.NL); if (Options.dot) nfa.writeDot(Emitter.normalize("nfa.dot", null)); // $NON-NLS-1$ Out.println(ErrorMessages.NFA_STATES, nfa.numStates); time.start(); DFA dfa = nfa.getDFA(); time.stop(); Out.time(ErrorMessages.DFA_TOOK, time); dfa.checkActions(scanner, parser); nfa = null; if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.DFA_IS) + Out.NL + dfa + Out.NL); if (Options.dot) dfa.writeDot(Emitter.normalize("dfa-big.dot", null)); // $NON-NLS-1$ Out.checkErrors(); time.start(); dfa.minimize(); time.stop(); Out.time(ErrorMessages.MIN_TOOK, time); if (Options.dump) Out.dump(ErrorMessages.get(ErrorMessages.MIN_DFA_IS) + Out.NL + dfa); if (Options.dot) dfa.writeDot(Emitter.normalize("dfa-min.dot", null)); // $NON-NLS-1$ time.start(); Emitter e = new Emitter(inputFile, parser, dfa); e.emit(); time.stop(); Out.time(ErrorMessages.WRITE_TOOK, time); totalTime.stop(); Out.time(ErrorMessages.TOTAL_TIME, totalTime); } catch (ScannerException e) { Out.error(e.file, e.message, e.line, e.column); throw new GeneratorException(); } catch (MacroException e) { Out.error(e.getMessage()); throw new GeneratorException(); } catch (IOException e) { Out.error(ErrorMessages.IO_ERROR, e.toString()); throw new GeneratorException(); } catch (OutOfMemoryError e) { Out.error(ErrorMessages.OUT_OF_MEMORY); throw new GeneratorException(); } catch (GeneratorException e) { throw new GeneratorException(); } catch (Exception e) { e.printStackTrace(); throw new GeneratorException(); } }
public static void printUsage() { Out.println(""); // $NON-NLS-1$ Out.println("Usage: jflex <options> <input-files>"); Out.println(""); Out.println("Where <options> can be one or more of"); Out.println("-d <directory> write generated file to <directory>"); Out.println("--skel <file> use external skeleton <file>"); Out.println("--switch (DEPRECATED - will be removed in JFlex 1.6)"); Out.println("--table (DEPRECATED - will be removed in JFlex 1.6)"); Out.println("--pack set default code generation method (default)"); Out.println("--jlex strict JLex compatibility"); Out.println("--legacydot dot (.) metachar matches [^\\n] instead of"); Out.println(" [^\\n\\r\\u000B\\u000C\\u0085\\u2028\\u2029]"); Out.println("--inputstreamctor include a scanner constructor taking InputStream (default)"); Out.println("--noinputstreamctor don't include a scanner constructor taking InputStream"); Out.println("--nomin skip minimization step"); Out.println("--nobak don't create backup files"); Out.println("--dump display transition tables"); Out.println("--dot write graphviz .dot files for the generated automata (alpha)"); Out.println("--verbose"); Out.println("-v display generation progress messages (default)"); Out.println("--quiet"); Out.println("-q display errors only"); Out.println("--time display generation time statistics"); Out.println("--version print the version number of this copy of jflex"); Out.println("--info print system + JDK information"); Out.println("--uniprops <ver> print all supported properties for Unicode version <ver>"); Out.println("--help"); Out.println("-h print this message"); Out.println(""); Out.println(ErrorMessages.THIS_IS_JFLEX, version); Out.println("Have a nice day!"); }
public static List<File> parseOptions(String argv[]) throws SilentExit { List<File> files = new ArrayList<File>(); for (int i = 0; i < argv.length; i++) { if (argv[i].equals("-d") || argv[i].equals("--outdir")) { // $NON-NLS-1$ //$NON-NLS-2$ if (++i >= argv.length) { Out.error(ErrorMessages.NO_DIRECTORY); throw new GeneratorException(); } Options.setDir(argv[i]); continue; } if (argv[i].equals("--skel") || argv[i].equals("-skel")) { // $NON-NLS-1$ //$NON-NLS-2$ if (++i >= argv.length) { Out.error(ErrorMessages.NO_SKEL_FILE); throw new GeneratorException(); } Options.setSkeleton(new File(argv[i])); continue; } if (argv[i].equals("-jlex") || argv[i].equals("--jlex")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.jlex = true; continue; } if (argv[i].equals("-v") || argv[i].equals("--verbose") || argv[i].equals("-verbose")) { // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ Options.verbose = true; Options.progress = true; continue; } if (argv[i].equals("-q") || argv[i].equals("--quiet") || argv[i].equals("-quiet")) { // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ Options.verbose = false; Options.progress = false; continue; } if (argv[i].equals("--dump") || argv[i].equals("-dump")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.dump = true; continue; } if (argv[i].equals("--time") || argv[i].equals("-time")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.time = true; continue; } if (argv[i].equals("--version") || argv[i].equals("-version")) { // $NON-NLS-1$ //$NON-NLS-2$ Out.println(ErrorMessages.THIS_IS_JFLEX, version); throw new SilentExit(); } if (argv[i].equals("--dot") || argv[i].equals("-dot")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.dot = true; continue; } if (argv[i].equals("--help") || argv[i].equals("-h") || argv[i].equals("/h")) { // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ printUsage(); throw new SilentExit(); } if (argv[i].equals("--info") || argv[i].equals("-info")) { // $NON-NLS-1$ //$NON-NLS-2$ Out.printSystemInfo(); throw new SilentExit(); } if (argv[i].equals("--nomin") || argv[i].equals("-nomin")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.no_minimize = true; continue; } if (argv[i].equals("--pack") || argv[i].equals("-pack")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.gen_method = Options.PACK; continue; } if (argv[i].equals("--table") || argv[i].equals("-table")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.gen_method = Options.TABLE; continue; } if (argv[i].equals("--switch") || argv[i].equals("-switch")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.gen_method = Options.SWITCH; continue; } if (argv[i].equals("--nobak") || argv[i].equals("-nobak")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.no_backup = true; continue; } if (argv[i].equals("--legacydot") || argv[i].equals("-legacydot")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.legacy_dot = true; continue; } // TODO: In the JFlex version after 1.6, --inputstreamctor will be removed. if (argv[i].equals("--inputstreamctor") || argv[i].equals("-inputstreamctor")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.emitInputStreamCtor = true; continue; } // TODO: In the JFlex version after 1.6, --noinputstreamctor will be removed. if (argv[i].equals("--noinputstreamctor") || argv[i].equals("-noinputstreamctor")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.emitInputStreamCtor = false; continue; } if (argv[i].equals("--uniprops") || argv[i].equals("-uniprops")) { // $NON-NLS-1$ //$NON-NLS-2$ if (++i >= argv.length) { Out.error( ErrorMessages.PROPS_ARG_REQUIRES_UNICODE_VERSION, UnicodeProperties.UNICODE_VERSIONS); throw new GeneratorException(); } String unicodeVersion = argv[i]; try { printUnicodePropertyValuesAndAliases(unicodeVersion); } catch (UnicodeProperties.UnsupportedUnicodeVersionException e) { Out.error( ErrorMessages.UNSUPPORTED_UNICODE_VERSION_SUPPORTED_ARE, UnicodeProperties.UNICODE_VERSIONS); throw new GeneratorException(); } throw new SilentExit(); } if (argv[i].startsWith("-")) { // $NON-NLS-1$ Out.error(ErrorMessages.UNKNOWN_COMMANDLINE, argv[i]); printUsage(); throw new SilentExit(); } // if argv[i] is not an option, try to read it as file File f = new File(argv[i]); if (f.isFile() && f.canRead()) files.add(f); else { Out.error("Sorry, couldn't open \"" + f + "\""); // $NON-NLS-2$ throw new GeneratorException(); } } return files; }
/** * Run from the command-line, with a list of URLs as argument. * * <p><B>NOTE:</B><br> * This code will run with all the documents in memory - if you want to unload each from memory * after use, add code to store the corpus in a DataStore. */ public static void main(String args[]) throws GateException, IOException { // initialise the GATE library Out.prln("Initialising GATE..."); Gate.init(); Out.prln("...GATE initialised"); // initialise ANNIE (this may take several minutes) StandAloneAnnie annie = new StandAloneAnnie(); annie.initAnnie(); // create a GATE corpus and add a document for each command-line // argument Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus"); for (int i = 0; i < args.length; i++) { URL u = new URL(args[i]); FeatureMap params = Factory.newFeatureMap(); params.put("sourceUrl", u); params.put("preserveOriginalContent", new Boolean(true)); params.put("collectRepositioningInfo", new Boolean(true)); Out.prln("Creating doc for " + u); Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params); corpus.add(doc); } // for each of args // tell the pipeline about the corpus and run it annie.setCorpus(corpus); annie.execute(); // for each document, get an XML document with the // person and location names added Iterator iter = corpus.iterator(); int count = 0; String startTagPart_1 = "<span GateID=\""; String startTagPart_2 = "\" title=\""; String startTagPart_3 = "\" style=\"background:Red;\">"; String endTag = "</span>"; while (iter.hasNext()) { Document doc = (Document) iter.next(); AnnotationSet defaultAnnotSet = doc.getAnnotations(); Set annotTypesRequired = new HashSet(); annotTypesRequired.add("Person"); annotTypesRequired.add("Location"); Set<Annotation> peopleAndPlaces = new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired)); FeatureMap features = doc.getFeatures(); String originalContent = (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); RepositioningInfo info = (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME); ++count; File file = new File("StANNIE_" + count + ".HTML"); Out.prln("File name: '" + file.getAbsolutePath() + "'"); if (originalContent != null && info != null) { Out.prln("OrigContent and reposInfo existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionStart = info.getOriginalPos(insertPositionStart); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); insertPositionEnd = info.getOriginalPos(insertPositionEnd, true); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } // if - should generate else if (originalContent != null) { Out.prln("OrigContent existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } else { Out.prln("Content : " + originalContent); Out.prln("Repositioning: " + info); } String xmlDocument = doc.toXml(peopleAndPlaces, false); String fileName = new String("StANNIE_toXML_" + count + ".HTML"); FileWriter writer = new FileWriter(fileName); writer.write(xmlDocument); writer.close(); } // for each doc } // main
/** Run ANNIE */ public void execute() throws GateException { Out.prln("Running ANNIE..."); annieController.execute(); Out.prln("...ANNIE complete"); } // execute()
public static void printUsage() { Out.println(""); // $NON-NLS-1$ Out.println("Usage: jflex <options> <input-files>"); Out.println(""); Out.println("Where <options> can be one or more of"); Out.println("-d <directory> write generated file to <directory>"); Out.println("--skel <file> use external skeleton <file>"); Out.println("--switch"); Out.println("--table"); Out.println("--pack set default code generation method"); Out.println("--jlex strict JLex compatibility"); Out.println("--nomin skip minimization step"); Out.println("--nobak don't create backup files"); Out.println("--dump display transition tables"); Out.println("--dot write graphviz .dot files for the generated automata (alpha)"); Out.println("--verbose"); Out.println("-v display generation progress messages (default)"); Out.println("--quiet"); Out.println("-q display errors only"); Out.println("--time display generation time statistics"); Out.println("--version print the version number of this copy of jflex"); Out.println("--info print system + JDK information"); Out.println("--help"); Out.println("-h print this message"); Out.println(""); Out.println(ErrorMessages.THIS_IS_JFLEX, version); Out.println("Have a nice day!"); }
public static Vector parseOptions(String argv[]) throws SilentExit { Vector files = new Vector(); for (int i = 0; i < argv.length; i++) { if (argv[i].equals("-d") || argv[i].equals("--outdir")) { // $NON-NLS-1$ //$NON-NLS-2$ if (++i >= argv.length) { Out.error(ErrorMessages.NO_DIRECTORY); throw new GeneratorException(); } Options.setDir(argv[i]); continue; } if (argv[i].equals("--skel") || argv[i].equals("-skel")) { // $NON-NLS-1$ //$NON-NLS-2$ if (++i >= argv.length) { Out.error(ErrorMessages.NO_SKEL_FILE); throw new GeneratorException(); } Options.setSkeleton(new File(argv[i])); continue; } if (argv[i].equals("-jlex") || argv[i].equals("--jlex")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.jlex = true; continue; } if (argv[i].equals("-v") || argv[i].equals("--verbose") || argv[i].equals("-verbose")) { // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ Options.verbose = true; Options.progress = true; continue; } if (argv[i].equals("-q") || argv[i].equals("--quiet") || argv[i].equals("-quiet")) { // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ Options.verbose = false; Options.progress = false; continue; } if (argv[i].equals("--dump") || argv[i].equals("-dump")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.dump = true; continue; } if (argv[i].equals("--time") || argv[i].equals("-time")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.time = true; continue; } if (argv[i].equals("--version") || argv[i].equals("-version")) { // $NON-NLS-1$ //$NON-NLS-2$ Out.println(ErrorMessages.THIS_IS_JFLEX, version); throw new SilentExit(); } if (argv[i].equals("--dot") || argv[i].equals("-dot")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.dot = true; continue; } if (argv[i].equals("--help") || argv[i].equals("-h") || argv[i].equals("/h")) { // $NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ printUsage(); throw new SilentExit(); } if (argv[i].equals("--info") || argv[i].equals("-info")) { // $NON-NLS-1$ //$NON-NLS-2$ Out.printSystemInfo(); throw new SilentExit(); } if (argv[i].equals("--nomin") || argv[i].equals("-nomin")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.no_minimize = true; continue; } if (argv[i].equals("--pack") || argv[i].equals("-pack")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.gen_method = Options.PACK; continue; } if (argv[i].equals("--table") || argv[i].equals("-table")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.gen_method = Options.TABLE; continue; } if (argv[i].equals("--switch") || argv[i].equals("-switch")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.gen_method = Options.SWITCH; continue; } if (argv[i].equals("--nobak") || argv[i].equals("-nobak")) { // $NON-NLS-1$ //$NON-NLS-2$ Options.no_backup = true; continue; } if (argv[i].startsWith("-")) { // $NON-NLS-1$ Out.error(ErrorMessages.UNKNOWN_COMMANDLINE, argv[i]); printUsage(); throw new SilentExit(); } // if argv[i] is not an option, try to read it as file File f = new File(argv[i]); if (f.isFile() && f.canRead()) files.addElement(f); else { Out.error("Sorry, couldn't open \"" + f + "\""); // $NON-NLS-2$ throw new GeneratorException(); } } return files; }