Exemple #1
0
 /**
  * convert a list of APF files to XML files containing in-line markup for coreference. Takes the
  * following arguments:
  *
  * <ul>
  *   <li>year: the year (2002, 2003, 2004, 2005) the APF file was created, which determines its
  *       format and file extension
  *   <li>ACEdir: the directory containing the text and APF files
  *   <li>corefDir: the directory containing the in-line coreference files
  *   <li>filelist: a file containing a list of document names
  *   <li>showTypes: (optional) if this argument is present, entity type and subtype information is
  *       included with each mention
  * </ul>
  */
 public static void main(String[] args) throws IOException {
   if (!(args.length == 4 || args.length == 5)) {
     System.err.println("APFtoCorefXML requires 4 or 5 arguments:");
     System.err.println("  year apf-directory  output-directory  filelist [showTypes]");
     System.exit(1);
   }
   String year = args[0];
   apfExtension = ".apf.xml";
   AceDocument.ace2004 = false;
   AceDocument.ace2005 = false;
   if (year.equals("2002")) {
     apfExtension = ".sgm.tmx.rdc.xml";
   } else if (year.equals("2003")) {
   } else if (year.equals("2004")) {
     AceDocument.ace2004 = true;
   } else if (year.equals("2005")) {
     AceDocument.ace2004 = true;
     AceDocument.ace2005 = true;
   } else {
     System.err.println("Invalid year:  must be 2002-2005");
     System.exit(1);
   }
   ACEdir = args[1];
   if (!ACEdir.endsWith("/")) ACEdir += "/";
   outputDir = args[2];
   if (!outputDir.endsWith("/")) outputDir += "/";
   String fileList = args[3];
   showTypes = args.length == 5;
   processFileList(fileList);
 }
Exemple #2
0
 /** analyze arguments to APFtoXML. */
 public static void init(String[] args) throws IOException {
   if (args.length == 0) argErr();
   JetTest.encoding = "UTF-8";
   year = args[0];
   AceDocument.ace2004 = false;
   AceDocument.ace2005 = false;
   int requiredArgs = 6;
   if (year.equals("2002")) {
   } else if (year.equals("2003")) {
   } else if (year.equals("2004")) {
     requiredArgs = 8;
     String gazFile = args[6];
     String preDict = args[7];
     gazetteer = new Gazetteer();
     gazetteer.load(gazFile);
     loadPreDict(preDict);
     AceDocument.ace2004 = true;
   } else if (year.equals("2005")) {
     AceDocument.ace2004 = true;
     AceDocument.ace2005 = true;
   } else {
     System.err.println("Invalid year:  must be 2002-2005");
     System.exit(1);
   }
   if (args.length <= requiredArgs) argErr();
   ACEdir = args[1];
   if (!ACEdir.endsWith("/")) ACEdir += "/";
   outputDir = args[2];
   if (!outputDir.endsWith("/")) outputDir += "/";
   fileList = args[3];
   apfExtension = args[4];
   outputExtension = args[5];
   for (int i = requiredArgs; i < args.length; i++) setFlag(args[i]);
 }
Exemple #3
0
 private static void argErr() {
   System.err.println("APFtoXML arguments:");
   System.err.println(
       "  year apf-directory  output-directory  filelist apf-extension "
           + "output-extension [gazetteer pre-dictionary] flag ...");
   System.err.println("gazetteer and pre-dictionary needed for year = 2004");
   System.err.println("possible flags:  sentences timex mentions extents types names");
   System.exit(1);
 }
Exemple #4
0
 public static void setFlag(String flag) {
   if (flag.equals("sentences")
       || flag.equals("timex")
       || flag.equals("mentions")
       || flag.equals("extents")
       || flag.equals("types")
       || flag.equals("names")) {
     flags.add(flag);
   } else {
     System.err.println("APFtoXML:  invalid flag");
     System.err.println("possible flags:  sentences timex mentions extents types names");
     System.exit(1);
   }
 }
Exemple #5
0
  public static void main(String[] args) throws IOException, ClassNotFoundException {
    // initialize Jet
    if (args.length < 2) {
      System.err.println("usage: java EntityFinder propfile datapath files ...");
      System.exit(2);
    }
    JetTest.initializeFromConfig(args[0], args[1]);
    Pat.trace = false;
    Resolve.trace = false;

    String script = JetTest.config.getProperty("processDocument");
    if (script == null || script.length() == 0) {
      Console.println("*** System error: no processDocument script.");
      return;
    }

    for (int i = 2; i < args.length; i++) {
      processFile(args[i], System.out);
    }
  }