예제 #1
0
  static void processFile(String fname, PrintStream out) throws IOException {
    System.err.println("Processing: " + fname);

    FileInputStream fio = new FileInputStream(new File(fname));
    InputStreamReader fread = new InputStreamReader(fio, JetTest.encoding);
    BufferedReader fp = new BufferedReader(fread);
    StringBuffer buf = new StringBuffer();

    int docno = 0, allsents = 0, processedsents = 0;
    while (true) {
      String line = fp.readLine();

      // EOF or an empty line: the end of a Document.
      if (line == null || line.equals("")) {
        if (0 < buf.length()) {
          SGMLProcessor.allTags = true;
          Document doc = SGMLProcessor.sgmlToDoc(buf.toString(), (String[]) null);
          doc.setSGMLwrapMargin(0);
          System.err.println(
              "Doc-" + docno + ": sents=" + allsents + ", processed=" + processedsents);
          processDoc1(doc, docno);
          writeDoc1(doc, out);
          out.flush();
          buf = new StringBuffer();
          docno++;
          allsents = 0;
          processedsents = 0;
        }
        if (line == null) {
          break;
        } else {
          continue;
        }
      }

      if (line.startsWith("#")) {
        // "#" indicates a comment line.
        buf.append(line + "\n");
      } else {
        allsents++;
        if (processedsents < MaxProcessSentences) {
          buf.append("<sentence>");
          String[] words = line.split(" ");
          for (int i = 0; i < words.length; i++) {
            if (0 != words[i].length()) {
              buf.append("<token>" + words[i] + " </token>");
            }
          }
          buf.append("</sentence>\n");
          processedsents++;
        }
      }
    }

    fp.close();
    fread.close();
    fio.close();
    return;
  }
예제 #2
0
파일: APFtoXML.java 프로젝트: rgrishman/jet
 private static void processFileList(String fileList) throws IOException {
   // open list of files
   BufferedReader reader = new BufferedReader(new FileReader(fileList));
   String currentDoc;
   while ((currentDoc = reader.readLine()) != null) {
     processFileAndCatchError(currentDoc);
   }
   reader.close();
 }
예제 #3
0
파일: APFtoXML.java 프로젝트: rgrishman/jet
 private static void loadPreDict(String dictFile) {
   try {
     BufferedReader reader = new BufferedReader(new FileReader(dictFile));
     String line;
     while ((line = reader.readLine()) != null) {
       String preType = line.substring(0, 1);
       String word = line.substring(2);
       preDict.put(word, preType);
     }
     reader.close();
   } catch (IOException e) {
     System.err.print("Unable to load dictionary due to exception: ");
     System.err.println(e);
   }
 }
예제 #4
0
 private static void processFileList(String fileList) throws IOException {
   // open list of files
   BufferedReader reader = new BufferedReader(new FileReader(fileList));
   int docCount = 0;
   String currentDoc;
   while ((currentDoc = reader.readLine()) != null) {
     // process file 'currentDoc'
     docCount++;
     System.out.println("\nProcessing document " + docCount + ": " + currentDoc);
     String textFileName = ACEdir + currentDoc + ".sgm";
     ExternalDocument doc = new ExternalDocument("sgml", textFileName);
     doc.setAllTags(true);
     doc.open();
     String APFfileName = ACEdir + currentDoc + apfExtension;
     AceDocument aceDoc = new AceDocument(textFileName, APFfileName);
     addMentionTags(doc, aceDoc);
     doc.setSGMLwrapMargin(0);
     doc.saveAs(outputDir, currentDoc + ".co.txt");
   }
   reader.close();
 }