/** * Writes value of each String in a collection to * * @param lines : lines to write, null entries produce blank lines * @return returns the current instance. keep in mind that if instance is not constructed with * keepopen, chaining other write methods will throw an exception. * @throws java.io.IOException if an I/O error occurs */ public SimpleTextWriter writeLines(Collection<String> lines) throws IOException { try { IOs.writeLines(lines, writer); return this; } finally { if (!keepOpen) close(); } }
/** * Writes the <code>toString()</code> value of each item in a collection * * @param objects : lines to write, null entries produce blank lines * @return returns the current instance. keep in mind that if instance is not constructed with * keepopen, chaining other write methods will throw an exception. * @throws java.io.IOException if an I/O error occurs */ public SimpleTextWriter writeToStringLines(Collection<?> objects) throws IOException { try { IOs.writeToStringLines(objects, writer); return this; } finally { if (!keepOpen) close(); } }
/** * Writes a String to the file. * * @param s : string to write. * @return returns the current instance. keep in mind that if instance is not constructed with * keepOpen(), chaining other write methods will throw an exception. * @throws java.io.IOException if an I/O error occurs */ public SimpleTextWriter write(String s) throws IOException { try { if (s == null || s.length() == 0) return this; writer.write(s); return this; } finally { if (!keepOpen) close(); } }
public static void generateTrainingCorpus(File trainingFile, File rootCorpus, File igCorpus) throws IOException { DataSet trainingSet = Files.readLines(trainingFile, Charsets.UTF_8, new DataSetLoader()); System.out.println("Amount of sentences in training set:" + trainingSet.sentences.size()); System.out.println("Amount of tokens in training set:" + trainingSet.tokenCount()); SimpleTextWriter rootWriter = SimpleTextWriter.keepOpenUTF8Writer(rootCorpus); System.out.println("Generating Lemma Corpus."); for (SentenceData sentenceData : trainingSet) { List<String> roots = Lists.newArrayList("<s>"); for (Z3WordData word : sentenceData.words) { Z3WordParse parse = new Z3WordParse(word.correctParse); roots.add(parse.root); } roots.add("</s>"); rootWriter.writeLine(Joiner.on(" ").join(roots)); } rootWriter.close(); SimpleTextWriter igWriter = SimpleTextWriter.keepOpenUTF8Writer(igCorpus); System.out.println("Generating IG Corpus."); Z3WordParse start = new Z3WordParse(SENTENCE_START_PARSE); Z3WordParse end = new Z3WordParse(SENTENCE_END_PARSE); for (SentenceData sentenceData : trainingSet) { if (sentenceData.words.size() == 0) continue; Z3WordParse first = start; Z3WordParse second = new Z3WordParse(sentenceData.words.get(0).correctParse); for (int i = 1; i < sentenceData.words.size(); i++) { Z3WordParse third = new Z3WordParse(sentenceData.words.get(i).correctParse); for (int j = 0; j < third.igs.size(); j++) { igWriter.writeLine(first.getLastIg() + " " + second.getLastIg() + " " + third.igs.get(j)); } first = second; second = third; } igWriter.writeLine(first.getLastIg() + " " + second.getLastIg() + " " + end.getLastIg()); } igWriter.close(); }