/** * convert a list of APF files to XML files containing in-line markup for coreference. Takes the * following arguments: * * <ul> * <li>year: the year (2002, 2003, 2004, 2005) the APF file was created, which determines its * format and file extension * <li>ACEdir: the directory containing the text and APF files * <li>corefDir: the directory containing the in-line coreference files * <li>filelist: a file containing a list of document names * <li>showTypes: (optional) if this argument is present, entity type and subtype information is * included with each mention * </ul> */ public static void main(String[] args) throws IOException { if (!(args.length == 4 || args.length == 5)) { System.err.println("APFtoCorefXML requires 4 or 5 arguments:"); System.err.println(" year apf-directory output-directory filelist [showTypes]"); System.exit(1); } String year = args[0]; apfExtension = ".apf.xml"; AceDocument.ace2004 = false; AceDocument.ace2005 = false; if (year.equals("2002")) { apfExtension = ".sgm.tmx.rdc.xml"; } else if (year.equals("2003")) { } else if (year.equals("2004")) { AceDocument.ace2004 = true; } else if (year.equals("2005")) { AceDocument.ace2004 = true; AceDocument.ace2005 = true; } else { System.err.println("Invalid year: must be 2002-2005"); System.exit(1); } ACEdir = args[1]; if (!ACEdir.endsWith("/")) ACEdir += "/"; outputDir = args[2]; if (!outputDir.endsWith("/")) outputDir += "/"; String fileList = args[3]; showTypes = args.length == 5; processFileList(fileList); }
/** analyze arguments to APFtoXML. */ public static void init(String[] args) throws IOException { if (args.length == 0) argErr(); JetTest.encoding = "UTF-8"; year = args[0]; AceDocument.ace2004 = false; AceDocument.ace2005 = false; int requiredArgs = 6; if (year.equals("2002")) { } else if (year.equals("2003")) { } else if (year.equals("2004")) { requiredArgs = 8; String gazFile = args[6]; String preDict = args[7]; gazetteer = new Gazetteer(); gazetteer.load(gazFile); loadPreDict(preDict); AceDocument.ace2004 = true; } else if (year.equals("2005")) { AceDocument.ace2004 = true; AceDocument.ace2005 = true; } else { System.err.println("Invalid year: must be 2002-2005"); System.exit(1); } if (args.length <= requiredArgs) argErr(); ACEdir = args[1]; if (!ACEdir.endsWith("/")) ACEdir += "/"; outputDir = args[2]; if (!outputDir.endsWith("/")) outputDir += "/"; fileList = args[3]; apfExtension = args[4]; outputExtension = args[5]; for (int i = requiredArgs; i < args.length; i++) setFlag(args[i]); }
private static void argErr() { System.err.println("APFtoXML arguments:"); System.err.println( " year apf-directory output-directory filelist apf-extension " + "output-extension [gazetteer pre-dictionary] flag ..."); System.err.println("gazetteer and pre-dictionary needed for year = 2004"); System.err.println("possible flags: sentences timex mentions extents types names"); System.exit(1); }
public static void setFlag(String flag) { if (flag.equals("sentences") || flag.equals("timex") || flag.equals("mentions") || flag.equals("extents") || flag.equals("types") || flag.equals("names")) { flags.add(flag); } else { System.err.println("APFtoXML: invalid flag"); System.err.println("possible flags: sentences timex mentions extents types names"); System.exit(1); } }
/** * process a set of documents through Jet in accordance with a Jet parameter file. Invoked by <br> * ProcessDocuments propsFile docList inputDir inputSuffix outputDir outputSuffix * * @param propsFile Jet properties file * @param docList file containing list of documents to be processed, 1 per line * @param inputDir directory containing files to be processed * @param inputSuffix file extension to be added to document name to obtain name of input file * @param outputDir directory containing output files * @param outputSuffix file extension to be added to document name to obtain name of output file */ public static void main(String[] args) throws IOException { if (args.length != 6) { System.err.println("ProcessDocuments requires 6 arguments:"); System.err.println(" propsFile docList inputDir inputSuffix outputDir outputSuffix"); System.exit(1); } String propsFile = args[0]; String docList = args[1]; String inputDir = args[2]; String inputSuffix = args[3]; String outputDir = args[4]; String outputSuffix = args[5]; // initialize Jet System.out.println("Starting ACE Jet..."); JetTest.initializeFromConfig(propsFile); // load ACE type dictionary EDTtype.readTypeDict(); // turn off traces Pat.trace = false; Resolve.trace = false; // ACE mode (provides additional antecedents ...) Resolve.ACE = true; String docName; int docCount = 0; BufferedReader docListReader = new BufferedReader(new FileReader(docList)); while ((docName = docListReader.readLine()) != null) { docCount++; String inputFile = docName + "." + inputSuffix; ExternalDocument doc = new ExternalDocument("sgml", inputDir, inputFile); doc.setAllTags(true); doc.open(); String[] types = doc.getAnnotationTypes(); doc.setSGMLwrapMargin(0); String outputFile = docName + "." + outputSuffix; BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outputDir, outputFile))); // process document Ace.monocase = Ace.allLowerCase(doc); Control.processDocument(doc, writer, docCount == -1, docCount); writer.close(); } }
public static void main(String[] args) throws IOException, ClassNotFoundException { // initialize Jet if (args.length < 2) { System.err.println("usage: java EntityFinder propfile datapath files ..."); System.exit(2); } JetTest.initializeFromConfig(args[0], args[1]); Pat.trace = false; Resolve.trace = false; String script = JetTest.config.getProperty("processDocument"); if (script == null || script.length() == 0) { Console.println("*** System error: no processDocument script."); return; } for (int i = 2; i < args.length; i++) { processFile(args[i], System.out); } }