private void explore(LineReader grammar) { int counter = 0; boolean first_line = true; while (grammar.hasNext()) { String line = grammar.next().trim(); counter++; String[] fields = line.split("\\s\\|{3}\\s"); if (fields.length < 4) { logger.warning("Incomplete grammar line at line " + counter); continue; } String lhs = fields[0]; String[] source = fields[1].split("\\s"); String[] target = fields[2].split("\\s"); String[] features = fields[3].split("\\s"); Vocabulary.id(lhs); // Add symbols to vocabulary. for (String source_word : source) { if (FormatUtils.isNonterminal(source_word)) Vocabulary.id(FormatUtils.stripNt(source_word)); else Vocabulary.id(source_word); } for (String target_word : target) { if (FormatUtils.isNonterminal(target_word)) Vocabulary.id(FormatUtils.stripNt(target_word)); else Vocabulary.id(target_word); } // Test features for labeling. if (first_line && features.length != 0) { if (!features[0].contains("=")) { // We assume that if there is one unlabeled feature the entire grammar is unlabeled. labeled = false; } this.types.setLabeled(labeled); first_line = false; } // Add feature names to vocabulary and pass the value through the // appropriate encoder. for (int f = 0; f < features.length; ++f) { if (labeled) { String[] fe = features[f].split("="); if (fe[0].equals("Alignment")) continue; types.observe(Vocabulary.id(fe[0]), Float.parseFloat(fe[1])); } else { types.observe(f, Float.parseFloat(features[f])); } } } }