static void processFile(String fname, PrintStream out) throws IOException { System.err.println("Processing: " + fname); FileInputStream fio = new FileInputStream(new File(fname)); InputStreamReader fread = new InputStreamReader(fio, JetTest.encoding); BufferedReader fp = new BufferedReader(fread); StringBuffer buf = new StringBuffer(); int docno = 0, allsents = 0, processedsents = 0; while (true) { String line = fp.readLine(); // EOF or an empty line: the end of a Document. if (line == null || line.equals("")) { if (0 < buf.length()) { SGMLProcessor.allTags = true; Document doc = SGMLProcessor.sgmlToDoc(buf.toString(), (String[]) null); doc.setSGMLwrapMargin(0); System.err.println( "Doc-" + docno + ": sents=" + allsents + ", processed=" + processedsents); processDoc1(doc, docno); writeDoc1(doc, out); out.flush(); buf = new StringBuffer(); docno++; allsents = 0; processedsents = 0; } if (line == null) { break; } else { continue; } } if (line.startsWith("#")) { // "#" indicates a comment line. buf.append(line + "\n"); } else { allsents++; if (processedsents < MaxProcessSentences) { buf.append("<sentence>"); String[] words = line.split(" "); for (int i = 0; i < words.length; i++) { if (0 != words[i].length()) { buf.append("<token>" + words[i] + " </token>"); } } buf.append("</sentence>\n"); processedsents++; } } } fp.close(); fread.close(); fio.close(); return; }
private static void processFileList(String fileList) throws IOException { // open list of files BufferedReader reader = new BufferedReader(new FileReader(fileList)); String currentDoc; while ((currentDoc = reader.readLine()) != null) { processFileAndCatchError(currentDoc); } reader.close(); }
private static void loadPreDict(String dictFile) { try { BufferedReader reader = new BufferedReader(new FileReader(dictFile)); String line; while ((line = reader.readLine()) != null) { String preType = line.substring(0, 1); String word = line.substring(2); preDict.put(word, preType); } reader.close(); } catch (IOException e) { System.err.print("Unable to load dictionary due to exception: "); System.err.println(e); } }
/** * process a set of documents through Jet in accordance with a Jet parameter file. Invoked by <br> * ProcessDocuments propsFile docList inputDir inputSuffix outputDir outputSuffix * * @param propsFile Jet properties file * @param docList file containing list of documents to be processed, 1 per line * @param inputDir directory containing files to be processed * @param inputSuffix file extension to be added to document name to obtain name of input file * @param outputDir directory containing output files * @param outputSuffix file extension to be added to document name to obtain name of output file */ public static void main(String[] args) throws IOException { if (args.length != 6) { System.err.println("ProcessDocuments requires 6 arguments:"); System.err.println(" propsFile docList inputDir inputSuffix outputDir outputSuffix"); System.exit(1); } String propsFile = args[0]; String docList = args[1]; String inputDir = args[2]; String inputSuffix = args[3]; String outputDir = args[4]; String outputSuffix = args[5]; // initialize Jet System.out.println("Starting ACE Jet..."); JetTest.initializeFromConfig(propsFile); // load ACE type dictionary EDTtype.readTypeDict(); // turn off traces Pat.trace = false; Resolve.trace = false; // ACE mode (provides additional antecedents ...) Resolve.ACE = true; String docName; int docCount = 0; BufferedReader docListReader = new BufferedReader(new FileReader(docList)); while ((docName = docListReader.readLine()) != null) { docCount++; String inputFile = docName + "." + inputSuffix; ExternalDocument doc = new ExternalDocument("sgml", inputDir, inputFile); doc.setAllTags(true); doc.open(); String[] types = doc.getAnnotationTypes(); doc.setSGMLwrapMargin(0); String outputFile = docName + "." + outputSuffix; BufferedWriter writer = new BufferedWriter(new FileWriter(new File(outputDir, outputFile))); // process document Ace.monocase = Ace.allLowerCase(doc); Control.processDocument(doc, writer, docCount == -1, docCount); writer.close(); } }
private static void processFileList(String fileList) throws IOException { // open list of files BufferedReader reader = new BufferedReader(new FileReader(fileList)); int docCount = 0; String currentDoc; while ((currentDoc = reader.readLine()) != null) { // process file 'currentDoc' docCount++; System.out.println("\nProcessing document " + docCount + ": " + currentDoc); String textFileName = ACEdir + currentDoc + ".sgm"; ExternalDocument doc = new ExternalDocument("sgml", textFileName); doc.setAllTags(true); doc.open(); String APFfileName = ACEdir + currentDoc + apfExtension; AceDocument aceDoc = new AceDocument(textFileName, APFfileName); addMentionTags(doc, aceDoc); doc.setSGMLwrapMargin(0); doc.saveAs(outputDir, currentDoc + ".co.txt"); } reader.close(); }