Esempio n. 1
0
 /** process a single file, 'docName', generating a file with in-line XML. */
 public static void processFile(String docName) {
   docCount++;
   System.out.println("\nProcessing document " + docCount + ": " + docName);
   String textFileName = ACEdir + docName + ".sgm";
   ExternalDocument doc = new ExternalDocument("sgml", textFileName);
   doc.setAllTags(true);
   if (year.equals("2003") || year.equals("2004")) doc.setEmptyTags(new String[] {"TURN"});
   doc.open();
   String APFfileName = ACEdir + docName + "." + apfExtension;
   AceDocument aceDoc = new AceDocument(textFileName, APFfileName);
   addAnnotations(doc, aceDoc);
   doc.setSGMLwrapMargin(0);
   doc.saveAs(outputDir, docName + "." + outputExtension);
 }
Esempio n. 2
0
  /**
   * process a set of documents through Jet in accordance with a Jet parameter file. Invoked by <br>
   * ProcessDocuments propsFile docList inputDir inputSuffix outputDir outputSuffix
   *
   * @param propsFile Jet properties file
   * @param docList file containing list of documents to be processed, 1 per line
   * @param inputDir directory containing files to be processed
   * @param inputSuffix file extension to be added to document name to obtain name of input file
   * @param outputDir directory containing output files
   * @param outputSuffix file extension to be added to document name to obtain name of output file
   */
  public static void main(String[] args) throws IOException {

    if (args.length != 6) {
      System.err.println("ProcessDocuments requires 6 arguments:");
      System.err.println("  propsFile docList inputDir inputSuffix outputDir outputSuffix");
      System.exit(1);
    }
    String propsFile = args[0];
    String docList = args[1];
    String inputDir = args[2];
    String inputSuffix = args[3];
    String outputDir = args[4];
    String outputSuffix = args[5];

    // initialize Jet

    System.out.println("Starting ACE Jet...");
    JetTest.initializeFromConfig(propsFile);
    // load ACE type dictionary
    EDTtype.readTypeDict();
    // turn off traces
    Pat.trace = false;
    Resolve.trace = false;
    // ACE mode (provides additional antecedents ...)
    Resolve.ACE = true;

    String docName;
    int docCount = 0;
    BufferedReader docListReader = new BufferedReader(new FileReader(docList));
    while ((docName = docListReader.readLine()) != null) {
      docCount++;
      String inputFile = docName + "." + inputSuffix;
      ExternalDocument doc = new ExternalDocument("sgml", inputDir, inputFile);
      doc.setAllTags(true);
      doc.open();
      String[] types = doc.getAnnotationTypes();
      doc.setSGMLwrapMargin(0);
      String outputFile = docName + "." + outputSuffix;
      BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outputDir, outputFile)));
      // process document
      Ace.monocase = Ace.allLowerCase(doc);
      Control.processDocument(doc, writer, docCount == -1, docCount);
      writer.close();
    }
  }
Esempio n. 3
0
 private static void processFileList(String fileList) throws IOException {
   // open list of files
   BufferedReader reader = new BufferedReader(new FileReader(fileList));
   int docCount = 0;
   String currentDoc;
   while ((currentDoc = reader.readLine()) != null) {
     // process file 'currentDoc'
     docCount++;
     System.out.println("\nProcessing document " + docCount + ": " + currentDoc);
     String textFileName = ACEdir + currentDoc + ".sgm";
     ExternalDocument doc = new ExternalDocument("sgml", textFileName);
     doc.setAllTags(true);
     doc.open();
     String APFfileName = ACEdir + currentDoc + apfExtension;
     AceDocument aceDoc = new AceDocument(textFileName, APFfileName);
     addMentionTags(doc, aceDoc);
     doc.setSGMLwrapMargin(0);
     doc.saveAs(outputDir, currentDoc + ".co.txt");
   }
   reader.close();
 }