static void processFile(String fname, PrintStream out) throws IOException { System.err.println("Processing: " + fname); FileInputStream fio = new FileInputStream(new File(fname)); InputStreamReader fread = new InputStreamReader(fio, JetTest.encoding); BufferedReader fp = new BufferedReader(fread); StringBuffer buf = new StringBuffer(); int docno = 0, allsents = 0, processedsents = 0; while (true) { String line = fp.readLine(); // EOF or an empty line: the end of a Document. if (line == null || line.equals("")) { if (0 < buf.length()) { SGMLProcessor.allTags = true; Document doc = SGMLProcessor.sgmlToDoc(buf.toString(), (String[]) null); doc.setSGMLwrapMargin(0); System.err.println( "Doc-" + docno + ": sents=" + allsents + ", processed=" + processedsents); processDoc1(doc, docno); writeDoc1(doc, out); out.flush(); buf = new StringBuffer(); docno++; allsents = 0; processedsents = 0; } if (line == null) { break; } else { continue; } } if (line.startsWith("#")) { // "#" indicates a comment line. buf.append(line + "\n"); } else { allsents++; if (processedsents < MaxProcessSentences) { buf.append("<sentence>"); String[] words = line.split(" "); for (int i = 0; i < words.length; i++) { if (0 != words[i].length()) { buf.append("<token>" + words[i] + " </token>"); } } buf.append("</sentence>\n"); processedsents++; } } } fp.close(); fread.close(); fio.close(); return; }
private static void processFileList(String fileList) throws IOException { // open list of files BufferedReader reader = new BufferedReader(new FileReader(fileList)); String currentDoc; while ((currentDoc = reader.readLine()) != null) { processFileAndCatchError(currentDoc); } reader.close(); }
private static void loadPreDict(String dictFile) { try { BufferedReader reader = new BufferedReader(new FileReader(dictFile)); String line; while ((line = reader.readLine()) != null) { String preType = line.substring(0, 1); String word = line.substring(2); preDict.put(word, preType); } reader.close(); } catch (IOException e) { System.err.print("Unable to load dictionary due to exception: "); System.err.println(e); } }
private static void processFileList(String fileList) throws IOException { // open list of files BufferedReader reader = new BufferedReader(new FileReader(fileList)); int docCount = 0; String currentDoc; while ((currentDoc = reader.readLine()) != null) { // process file 'currentDoc' docCount++; System.out.println("\nProcessing document " + docCount + ": " + currentDoc); String textFileName = ACEdir + currentDoc + ".sgm"; ExternalDocument doc = new ExternalDocument("sgml", textFileName); doc.setAllTags(true); doc.open(); String APFfileName = ACEdir + currentDoc + apfExtension; AceDocument aceDoc = new AceDocument(textFileName, APFfileName); addMentionTags(doc, aceDoc); doc.setSGMLwrapMargin(0); doc.saveAs(outputDir, currentDoc + ".co.txt"); } reader.close(); }