예제 #1
0
  private void explore(LineReader grammar) {
    int counter = 0;
    boolean first_line = true;
    while (grammar.hasNext()) {
      String line = grammar.next().trim();
      counter++;

      String[] fields = line.split("\\s\\|{3}\\s");
      if (fields.length < 4) {
        logger.warning("Incomplete grammar line at line " + counter);
        continue;
      }

      String lhs = fields[0];
      String[] source = fields[1].split("\\s");
      String[] target = fields[2].split("\\s");
      String[] features = fields[3].split("\\s");

      Vocabulary.id(lhs);
      // Add symbols to vocabulary.
      for (String source_word : source) {
        if (FormatUtils.isNonterminal(source_word)) Vocabulary.id(FormatUtils.stripNt(source_word));
        else Vocabulary.id(source_word);
      }
      for (String target_word : target) {
        if (FormatUtils.isNonterminal(target_word)) Vocabulary.id(FormatUtils.stripNt(target_word));
        else Vocabulary.id(target_word);
      }

      // Test features for labeling.
      if (first_line && features.length != 0) {
        if (!features[0].contains("=")) {
          // We assume that if there is one unlabeled feature the entire grammar is unlabeled.
          labeled = false;
        }
        this.types.setLabeled(labeled);
        first_line = false;
      }

      // Add feature names to vocabulary and pass the value through the
      // appropriate encoder.
      for (int f = 0; f < features.length; ++f) {
        if (labeled) {
          String[] fe = features[f].split("=");
          if (fe[0].equals("Alignment")) continue;
          types.observe(Vocabulary.id(fe[0]), Float.parseFloat(fe[1]));
        } else {
          types.observe(f, Float.parseFloat(features[f]));
        }
      }
    }
  }