예제 #1
0
 /**
  * Writes value of each String in a collection to
  *
  * @param lines : lines to write, null entries produce blank lines
  * @return returns the current instance. keep in mind that if instance is not constructed with
  *     keepopen, chaining other write methods will throw an exception.
  * @throws java.io.IOException if an I/O error occurs
  */
 public SimpleTextWriter writeLines(Collection<String> lines) throws IOException {
   try {
     IOs.writeLines(lines, writer);
     return this;
   } finally {
     if (!keepOpen) close();
   }
 }
예제 #2
0
 /**
  * Writes the <code>toString()</code> value of each item in a collection
  *
  * @param objects : lines to write, null entries produce blank lines
  * @return returns the current instance. keep in mind that if instance is not constructed with
  *     keepopen, chaining other write methods will throw an exception.
  * @throws java.io.IOException if an I/O error occurs
  */
 public SimpleTextWriter writeToStringLines(Collection<?> objects) throws IOException {
   try {
     IOs.writeToStringLines(objects, writer);
     return this;
   } finally {
     if (!keepOpen) close();
   }
 }
예제 #3
0
 /**
  * Writes a String to the file.
  *
  * @param s : string to write.
  * @return returns the current instance. keep in mind that if instance is not constructed with
  *     keepOpen(), chaining other write methods will throw an exception.
  * @throws java.io.IOException if an I/O error occurs
  */
 public SimpleTextWriter write(String s) throws IOException {
   try {
     if (s == null || s.length() == 0) return this;
     writer.write(s);
     return this;
   } finally {
     if (!keepOpen) close();
   }
 }
예제 #4
0
 public static void generateTrainingCorpus(File trainingFile, File rootCorpus, File igCorpus)
     throws IOException {
   DataSet trainingSet = Files.readLines(trainingFile, Charsets.UTF_8, new DataSetLoader());
   System.out.println("Amount of sentences in training set:" + trainingSet.sentences.size());
   System.out.println("Amount of tokens in training set:" + trainingSet.tokenCount());
   SimpleTextWriter rootWriter = SimpleTextWriter.keepOpenUTF8Writer(rootCorpus);
   System.out.println("Generating Lemma Corpus.");
   for (SentenceData sentenceData : trainingSet) {
     List<String> roots = Lists.newArrayList("<s>");
     for (Z3WordData word : sentenceData.words) {
       Z3WordParse parse = new Z3WordParse(word.correctParse);
       roots.add(parse.root);
     }
     roots.add("</s>");
     rootWriter.writeLine(Joiner.on(" ").join(roots));
   }
   rootWriter.close();
   SimpleTextWriter igWriter = SimpleTextWriter.keepOpenUTF8Writer(igCorpus);
   System.out.println("Generating IG Corpus.");
   Z3WordParse start = new Z3WordParse(SENTENCE_START_PARSE);
   Z3WordParse end = new Z3WordParse(SENTENCE_END_PARSE);
   for (SentenceData sentenceData : trainingSet) {
     if (sentenceData.words.size() == 0) continue;
     Z3WordParse first = start;
     Z3WordParse second = new Z3WordParse(sentenceData.words.get(0).correctParse);
     for (int i = 1; i < sentenceData.words.size(); i++) {
       Z3WordParse third = new Z3WordParse(sentenceData.words.get(i).correctParse);
       for (int j = 0; j < third.igs.size(); j++) {
         igWriter.writeLine(first.getLastIg() + " " + second.getLastIg() + " " + third.igs.get(j));
       }
       first = second;
       second = third;
     }
     igWriter.writeLine(first.getLastIg() + " " + second.getLastIg() + " " + end.getLastIg());
   }
   igWriter.close();
 }