Exemple #1
0
  static void processFile(String fname, PrintStream out) throws IOException {
    System.err.println("Processing: " + fname);

    FileInputStream fio = new FileInputStream(new File(fname));
    InputStreamReader fread = new InputStreamReader(fio, JetTest.encoding);
    BufferedReader fp = new BufferedReader(fread);
    StringBuffer buf = new StringBuffer();

    int docno = 0, allsents = 0, processedsents = 0;
    while (true) {
      String line = fp.readLine();

      // EOF or an empty line: the end of a Document.
      if (line == null || line.equals("")) {
        if (0 < buf.length()) {
          SGMLProcessor.allTags = true;
          Document doc = SGMLProcessor.sgmlToDoc(buf.toString(), (String[]) null);
          doc.setSGMLwrapMargin(0);
          System.err.println(
              "Doc-" + docno + ": sents=" + allsents + ", processed=" + processedsents);
          processDoc1(doc, docno);
          writeDoc1(doc, out);
          out.flush();
          buf = new StringBuffer();
          docno++;
          allsents = 0;
          processedsents = 0;
        }
        if (line == null) {
          break;
        } else {
          continue;
        }
      }

      if (line.startsWith("#")) {
        // "#" indicates a comment line.
        buf.append(line + "\n");
      } else {
        allsents++;
        if (processedsents < MaxProcessSentences) {
          buf.append("<sentence>");
          String[] words = line.split(" ");
          for (int i = 0; i < words.length; i++) {
            if (0 != words[i].length()) {
              buf.append("<token>" + words[i] + " </token>");
            }
          }
          buf.append("</sentence>\n");
          processedsents++;
        }
      }
    }

    fp.close();
    fread.close();
    fio.close();
    return;
  }
Exemple #2
0
 static void writeDocRaw(Document doc, PrintStream out) throws IOException {
   out.println(doc.writeSGML(null).toString());
   out.flush();
   return;
 }