Example #1
0
 /**
  * convert a list of APF files to XML files containing in-line markup for coreference. Takes the
  * following arguments:
  *
  * <ul>
  *   <li>year: the year (2002, 2003, 2004, 2005) the APF file was created, which determines its
  *       format and file extension
  *   <li>ACEdir: the directory containing the text and APF files
  *   <li>corefDir: the directory containing the in-line coreference files
  *   <li>filelist: a file containing a list of document names
  *   <li>showTypes: (optional) if this argument is present, entity type and subtype information is
  *       included with each mention
  * </ul>
  */
 public static void main(String[] args) throws IOException {
   if (!(args.length == 4 || args.length == 5)) {
     System.err.println("APFtoCorefXML requires 4 or 5 arguments:");
     System.err.println("  year apf-directory  output-directory  filelist [showTypes]");
     System.exit(1);
   }
   String year = args[0];
   apfExtension = ".apf.xml";
   AceDocument.ace2004 = false;
   AceDocument.ace2005 = false;
   if (year.equals("2002")) {
     apfExtension = ".sgm.tmx.rdc.xml";
   } else if (year.equals("2003")) {
   } else if (year.equals("2004")) {
     AceDocument.ace2004 = true;
   } else if (year.equals("2005")) {
     AceDocument.ace2004 = true;
     AceDocument.ace2005 = true;
   } else {
     System.err.println("Invalid year:  must be 2002-2005");
     System.exit(1);
   }
   ACEdir = args[1];
   if (!ACEdir.endsWith("/")) ACEdir += "/";
   outputDir = args[2];
   if (!outputDir.endsWith("/")) outputDir += "/";
   String fileList = args[3];
   showTypes = args.length == 5;
   processFileList(fileList);
 }
Example #2
0
 /** analyze arguments to APFtoXML. */
 public static void init(String[] args) throws IOException {
   if (args.length == 0) argErr();
   JetTest.encoding = "UTF-8";
   year = args[0];
   AceDocument.ace2004 = false;
   AceDocument.ace2005 = false;
   int requiredArgs = 6;
   if (year.equals("2002")) {
   } else if (year.equals("2003")) {
   } else if (year.equals("2004")) {
     requiredArgs = 8;
     String gazFile = args[6];
     String preDict = args[7];
     gazetteer = new Gazetteer();
     gazetteer.load(gazFile);
     loadPreDict(preDict);
     AceDocument.ace2004 = true;
   } else if (year.equals("2005")) {
     AceDocument.ace2004 = true;
     AceDocument.ace2005 = true;
   } else {
     System.err.println("Invalid year:  must be 2002-2005");
     System.exit(1);
   }
   if (args.length <= requiredArgs) argErr();
   ACEdir = args[1];
   if (!ACEdir.endsWith("/")) ACEdir += "/";
   outputDir = args[2];
   if (!outputDir.endsWith("/")) outputDir += "/";
   fileList = args[3];
   apfExtension = args[4];
   outputExtension = args[5];
   for (int i = requiredArgs; i < args.length; i++) setFlag(args[i]);
 }
Example #3
0
 private static void argErr() {
   System.err.println("APFtoXML arguments:");
   System.err.println(
       "  year apf-directory  output-directory  filelist apf-extension "
           + "output-extension [gazetteer pre-dictionary] flag ...");
   System.err.println("gazetteer and pre-dictionary needed for year = 2004");
   System.err.println("possible flags:  sentences timex mentions extents types names");
   System.exit(1);
 }
Example #4
0
 public static void setFlag(String flag) {
   if (flag.equals("sentences")
       || flag.equals("timex")
       || flag.equals("mentions")
       || flag.equals("extents")
       || flag.equals("types")
       || flag.equals("names")) {
     flags.add(flag);
   } else {
     System.err.println("APFtoXML:  invalid flag");
     System.err.println("possible flags:  sentences timex mentions extents types names");
     System.exit(1);
   }
 }
Example #5
0
  /**
   * process a set of documents through Jet in accordance with a Jet parameter file. Invoked by <br>
   * ProcessDocuments propsFile docList inputDir inputSuffix outputDir outputSuffix
   *
   * @param propsFile Jet properties file
   * @param docList file containing list of documents to be processed, 1 per line
   * @param inputDir directory containing files to be processed
   * @param inputSuffix file extension to be added to document name to obtain name of input file
   * @param outputDir directory containing output files
   * @param outputSuffix file extension to be added to document name to obtain name of output file
   */
  public static void main(String[] args) throws IOException {

    if (args.length != 6) {
      System.err.println("ProcessDocuments requires 6 arguments:");
      System.err.println("  propsFile docList inputDir inputSuffix outputDir outputSuffix");
      System.exit(1);
    }
    String propsFile = args[0];
    String docList = args[1];
    String inputDir = args[2];
    String inputSuffix = args[3];
    String outputDir = args[4];
    String outputSuffix = args[5];

    // initialize Jet

    System.out.println("Starting ACE Jet...");
    JetTest.initializeFromConfig(propsFile);
    // load ACE type dictionary
    EDTtype.readTypeDict();
    // turn off traces
    Pat.trace = false;
    Resolve.trace = false;
    // ACE mode (provides additional antecedents ...)
    Resolve.ACE = true;

    String docName;
    int docCount = 0;
    BufferedReader docListReader = new BufferedReader(new FileReader(docList));
    while ((docName = docListReader.readLine()) != null) {
      docCount++;
      String inputFile = docName + "." + inputSuffix;
      ExternalDocument doc = new ExternalDocument("sgml", inputDir, inputFile);
      doc.setAllTags(true);
      doc.open();
      String[] types = doc.getAnnotationTypes();
      doc.setSGMLwrapMargin(0);
      String outputFile = docName + "." + outputSuffix;
      BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outputDir, outputFile)));
      // process document
      Ace.monocase = Ace.allLowerCase(doc);
      Control.processDocument(doc, writer, docCount == -1, docCount);
      writer.close();
    }
  }
Example #6
0
  public static void main(String[] args) throws IOException, ClassNotFoundException {
    // initialize Jet
    if (args.length < 2) {
      System.err.println("usage: java EntityFinder propfile datapath files ...");
      System.exit(2);
    }
    JetTest.initializeFromConfig(args[0], args[1]);
    Pat.trace = false;
    Resolve.trace = false;

    String script = JetTest.config.getProperty("processDocument");
    if (script == null || script.length() == 0) {
      Console.println("*** System error: no processDocument script.");
      return;
    }

    for (int i = 2; i < args.length; i++) {
      processFile(args[i], System.out);
    }
  }