예제 #1
0
  private void explore(LineReader grammar) {
    int counter = 0;
    boolean first_line = true;
    while (grammar.hasNext()) {
      String line = grammar.next().trim();
      counter++;

      String[] fields = line.split("\\s\\|{3}\\s");
      if (fields.length < 4) {
        logger.warning("Incomplete grammar line at line " + counter);
        continue;
      }

      String lhs = fields[0];
      String[] source = fields[1].split("\\s");
      String[] target = fields[2].split("\\s");
      String[] features = fields[3].split("\\s");

      Vocabulary.id(lhs);
      // Add symbols to vocabulary.
      for (String source_word : source) {
        if (FormatUtils.isNonterminal(source_word)) Vocabulary.id(FormatUtils.stripNt(source_word));
        else Vocabulary.id(source_word);
      }
      for (String target_word : target) {
        if (FormatUtils.isNonterminal(target_word)) Vocabulary.id(FormatUtils.stripNt(target_word));
        else Vocabulary.id(target_word);
      }

      // Test features for labeling.
      if (first_line && features.length != 0) {
        if (!features[0].contains("=")) {
          // We assume that if there is one unlabeled feature the entire grammar is unlabeled.
          labeled = false;
        }
        this.types.setLabeled(labeled);
        first_line = false;
      }

      // Add feature names to vocabulary and pass the value through the
      // appropriate encoder.
      for (int f = 0; f < features.length; ++f) {
        if (labeled) {
          String[] fe = features[f].split("=");
          if (fe[0].equals("Alignment")) continue;
          types.observe(Vocabulary.id(fe[0]), Float.parseFloat(fe[1]));
        } else {
          types.observe(f, Float.parseFloat(features[f]));
        }
      }
    }
  }
예제 #2
0
  /**
   * Executes the packing.
   *
   * @throws IOException
   */
  public void pack() throws IOException {
    logger.info("Beginning exploration pass.");
    LineReader grammar_reader = null;
    LineReader alignment_reader = null;

    // Explore pass. Learn vocabulary and feature value histograms.
    logger.info("Exploring: " + grammar);
    grammar_reader = new LineReader(grammar);
    explore(grammar_reader);

    logger.info("Exploration pass complete. Freezing vocabulary and finalizing encoders.");
    if (dump != null) {
      PrintWriter dump_writer = new PrintWriter(dump);
      dump_writer.println(types.toString());
      dump_writer.close();
    }

    types.inferTypes(this.labeled);
    logger.info("Type inference complete.");

    logger.info("Finalizing encoding.");

    logger.info("Writing encoding.");
    types.write(output + File.separator + "encoding");

    logger.info("Freezing vocab.");
    Vocabulary.freeze();

    logger.info("Writing vocab.");
    Vocabulary.write(output + File.separator + "vocabulary");

    // Read previously written encoder configuration to match up to changed
    // vocabulary id's.
    logger.info("Reading encoding.");
    encoderConfig = new EncoderConfiguration();
    encoderConfig.load(output + File.separator + "encoding");

    logger.info("Beginning packing pass.");
    Queue<PackingFileTuple> slices = new PriorityQueue<PackingFileTuple>();
    // Actual binarization pass. Slice and pack source, target and data.
    grammar_reader = new LineReader(grammar);

    if (packAlignments) alignment_reader = new LineReader(alignments);
    binarize(grammar_reader, alignment_reader, slices);
    logger.info("Packing complete.");

    logger.info("Packed grammar in: " + output);
    logger.info("Done.");
  }
예제 #3
0
  public GrammarPacker(
      String grammar_filename,
      String config_filename,
      String output_filename,
      String alignments_filename,
      String featuredump_filename)
      throws IOException {
    this.labeled = true;
    this.grammar = grammar_filename;
    this.output = output_filename;
    this.dump = featuredump_filename;

    // TODO: Always open encoder config? This is debatable.
    this.types = new FeatureTypeAnalyzer(true);

    this.alignments = alignments_filename;
    packAlignments = (alignments != null);
    if (!packAlignments) {
      logger.info("No alignments file specified, skipping.");
    } else if (!new File(alignments_filename).exists()) {
      logger.severe("Alignments file does not exist: " + alignments);
      System.exit(0);
    }

    if (config_filename != null) {
      readConfig(config_filename);
      types.readConfig(config_filename);
    } else {
      logger.info("No config specified. Attempting auto-detection of feature types.");
    }

    File working_dir = new File(output);
    working_dir.mkdir();
    if (!working_dir.exists()) {
      logger.severe("Failed creating output directory.");
      System.exit(0);
    }
  }