예제 #1
0
  static void processFile(String fname, PrintStream out) throws IOException {
    System.err.println("Processing: " + fname);

    FileInputStream fio = new FileInputStream(new File(fname));
    InputStreamReader fread = new InputStreamReader(fio, JetTest.encoding);
    BufferedReader fp = new BufferedReader(fread);
    StringBuffer buf = new StringBuffer();

    int docno = 0, allsents = 0, processedsents = 0;
    while (true) {
      String line = fp.readLine();

      // EOF or an empty line: the end of a Document.
      if (line == null || line.equals("")) {
        if (0 < buf.length()) {
          SGMLProcessor.allTags = true;
          Document doc = SGMLProcessor.sgmlToDoc(buf.toString(), (String[]) null);
          doc.setSGMLwrapMargin(0);
          System.err.println(
              "Doc-" + docno + ": sents=" + allsents + ", processed=" + processedsents);
          processDoc1(doc, docno);
          writeDoc1(doc, out);
          out.flush();
          buf = new StringBuffer();
          docno++;
          allsents = 0;
          processedsents = 0;
        }
        if (line == null) {
          break;
        } else {
          continue;
        }
      }

      if (line.startsWith("#")) {
        // "#" indicates a comment line.
        buf.append(line + "\n");
      } else {
        allsents++;
        if (processedsents < MaxProcessSentences) {
          buf.append("<sentence>");
          String[] words = line.split(" ");
          for (int i = 0; i < words.length; i++) {
            if (0 != words[i].length()) {
              buf.append("<token>" + words[i] + " </token>");
            }
          }
          buf.append("</sentence>\n");
          processedsents++;
        }
      }
    }

    fp.close();
    fread.close();
    fio.close();
    return;
  }
예제 #2
0
파일: APFtoXML.java 프로젝트: rgrishman/jet
 private static void processFileList(String fileList) throws IOException {
   // open list of files
   BufferedReader reader = new BufferedReader(new FileReader(fileList));
   String currentDoc;
   while ((currentDoc = reader.readLine()) != null) {
     processFileAndCatchError(currentDoc);
   }
   reader.close();
 }
예제 #3
0
파일: APFtoXML.java 프로젝트: rgrishman/jet
 private static void loadPreDict(String dictFile) {
   try {
     BufferedReader reader = new BufferedReader(new FileReader(dictFile));
     String line;
     while ((line = reader.readLine()) != null) {
       String preType = line.substring(0, 1);
       String word = line.substring(2);
       preDict.put(word, preType);
     }
     reader.close();
   } catch (IOException e) {
     System.err.print("Unable to load dictionary due to exception: ");
     System.err.println(e);
   }
 }
예제 #4
0
  /**
   * process a set of documents through Jet in accordance with a Jet parameter file. Invoked by <br>
   * ProcessDocuments propsFile docList inputDir inputSuffix outputDir outputSuffix
   *
   * @param propsFile Jet properties file
   * @param docList file containing list of documents to be processed, 1 per line
   * @param inputDir directory containing files to be processed
   * @param inputSuffix file extension to be added to document name to obtain name of input file
   * @param outputDir directory containing output files
   * @param outputSuffix file extension to be added to document name to obtain name of output file
   */
  public static void main(String[] args) throws IOException {

    if (args.length != 6) {
      System.err.println("ProcessDocuments requires 6 arguments:");
      System.err.println("  propsFile docList inputDir inputSuffix outputDir outputSuffix");
      System.exit(1);
    }
    String propsFile = args[0];
    String docList = args[1];
    String inputDir = args[2];
    String inputSuffix = args[3];
    String outputDir = args[4];
    String outputSuffix = args[5];

    // initialize Jet

    System.out.println("Starting ACE Jet...");
    JetTest.initializeFromConfig(propsFile);
    // load ACE type dictionary
    EDTtype.readTypeDict();
    // turn off traces
    Pat.trace = false;
    Resolve.trace = false;
    // ACE mode (provides additional antecedents ...)
    Resolve.ACE = true;

    String docName;
    int docCount = 0;
    BufferedReader docListReader = new BufferedReader(new FileReader(docList));
    while ((docName = docListReader.readLine()) != null) {
      docCount++;
      String inputFile = docName + "." + inputSuffix;
      ExternalDocument doc = new ExternalDocument("sgml", inputDir, inputFile);
      doc.setAllTags(true);
      doc.open();
      String[] types = doc.getAnnotationTypes();
      doc.setSGMLwrapMargin(0);
      String outputFile = docName + "." + outputSuffix;
      BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outputDir, outputFile)));
      // process document
      Ace.monocase = Ace.allLowerCase(doc);
      Control.processDocument(doc, writer, docCount == -1, docCount);
      writer.close();
    }
  }
예제 #5
0
 private static void processFileList(String fileList) throws IOException {
   // open list of files
   BufferedReader reader = new BufferedReader(new FileReader(fileList));
   int docCount = 0;
   String currentDoc;
   while ((currentDoc = reader.readLine()) != null) {
     // process file 'currentDoc'
     docCount++;
     System.out.println("\nProcessing document " + docCount + ": " + currentDoc);
     String textFileName = ACEdir + currentDoc + ".sgm";
     ExternalDocument doc = new ExternalDocument("sgml", textFileName);
     doc.setAllTags(true);
     doc.open();
     String APFfileName = ACEdir + currentDoc + apfExtension;
     AceDocument aceDoc = new AceDocument(textFileName, APFfileName);
     addMentionTags(doc, aceDoc);
     doc.setSGMLwrapMargin(0);
     doc.saveAs(outputDir, currentDoc + ".co.txt");
   }
   reader.close();
 }