Esempio n. 1
0
  private static int parseCoNLL09(
      CompletePipelineCMDLineOptions options,
      CompletePipeline pipeline,
      BufferedReader in,
      SentenceWriter writer)
      throws IOException, Exception {
    List<String> forms = new ArrayList<String>();
    forms.add("<root>");
    List<Boolean> isPred = new ArrayList<Boolean>();
    isPred.add(false);
    String str;
    int senCount = 0;

    while ((str = in.readLine()) != null) {
      if (str.trim().equals("")) {
        Sentence s;
        if (options.desegment) {
          s = pipeline.parse(ChineseDesegmenter.desegment(forms.toArray(new String[0])));
        } else {
          s = options.skipPI ? pipeline.parseOraclePI(forms, isPred) : pipeline.parse(forms);
        }
        forms.clear();
        forms.add("<root>");
        isPred.clear();
        isPred.add(false); // Root is not a predicate
        writer.write(s);
        senCount++;
        if (senCount % 100 == 0) { // TODO fix output in general, don't
          // print to System.out. Wrap a
          // printstream in some (static)
          // class, and allow people to adjust
          // this. While doing this, also add
          // the option to make the output
          // file be -, ie so it prints to
          // stdout. All kinds of errors
          // should goto stderr, and nothing
          // should be printed to stdout by
          // default
          System.out.println("Processing sentence " + senCount);
        }
      } else {
        String[] tokens = WHITESPACE_PATTERN.split(str);
        forms.add(tokens[1]);
        if (options.skipPI) isPred.add(tokens[12].equals("Y"));
      }
    }

    if (forms.size() > 1) { // We have the root token too, remember!
      writer.write(pipeline.parse(forms));
      senCount++;
    }
    return senCount;
  }
Esempio n. 2
0
  private static int parseNonSegmentedLineByLine(
      CompletePipelineCMDLineOptions options,
      CompletePipeline pipeline,
      BufferedReader in,
      SentenceWriter writer)
      throws IOException, Exception {
    int senCount = 0;
    String str;

    while ((str = in.readLine()) != null) {
      Sentence s = pipeline.parse(str);
      writer.write(s);
      senCount++;
      if (senCount % 100 == 0) System.out.println("Processing sentence " + senCount); // TODO,
      // same
      // as
      // below.
    }

    return senCount;
  }