Java FormatUtils Examples

Programming Language: Java

Namespace/Package Name: joshua.util

Class/Type: FormatUtils

Examples at hotexamples.com: 2

Java FormatUtils - 2 examples found. These are the top rated real world Java examples of joshua.util.FormatUtils extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

stripNt(2)

isNonterminal(2)

getNonterminalIndex(1)

Example #1

Show file

File: GrammarPacker.java Project: se4u/joshua

  private void explore(LineReader grammar) {
    int counter = 0;
    boolean first_line = true;
    while (grammar.hasNext()) {
      String line = grammar.next().trim();
      counter++;

      String[] fields = line.split("\\s\\|{3}\\s");
      if (fields.length < 4) {
        logger.warning("Incomplete grammar line at line " + counter);
        continue;
      }

      String lhs = fields[0];
      String[] source = fields[1].split("\\s");
      String[] target = fields[2].split("\\s");
      String[] features = fields[3].split("\\s");

      Vocabulary.id(lhs);
      // Add symbols to vocabulary.
      for (String source_word : source) {
        if (FormatUtils.isNonterminal(source_word)) Vocabulary.id(FormatUtils.stripNt(source_word));
        else Vocabulary.id(source_word);
      }
      for (String target_word : target) {
        if (FormatUtils.isNonterminal(target_word)) Vocabulary.id(FormatUtils.stripNt(target_word));
        else Vocabulary.id(target_word);
      }

      // Test features for labeling.
      if (first_line && features.length != 0) {
        if (!features[0].contains("=")) {
          // We assume that if there is one unlabeled feature the entire grammar is unlabeled.
          labeled = false;
        }
        this.types.setLabeled(labeled);
        first_line = false;
      }

      // Add feature names to vocabulary and pass the value through the
      // appropriate encoder.
      for (int f = 0; f < features.length; ++f) {
        if (labeled) {
          String[] fe = features[f].split("=");
          if (fe[0].equals("Alignment")) continue;
          types.observe(Vocabulary.id(fe[0]), Float.parseFloat(fe[1]));
        } else {
          types.observe(f, Float.parseFloat(features[f]));
        }
      }
    }
  }

Example #2

Show file

File: GrammarPacker.java Project: se4u/joshua

  private void binarize(
      LineReader grammar_reader, LineReader alignment_reader, Queue<PackingFileTuple> slices)
      throws IOException {
    int counter = 0;
    int slice_counter = 0;
    int num_slices = 0;

    boolean ready_to_flush = false;
    String first_source_word = null;

    PackingTrie<SourceValue> source_trie = new PackingTrie<SourceValue>();
    PackingTrie<TargetValue> target_trie = new PackingTrie<TargetValue>();
    FeatureBuffer feature_buffer = new FeatureBuffer();

    AlignmentBuffer alignment_buffer = null;
    if (packAlignments) alignment_buffer = new AlignmentBuffer();

    TreeMap<Integer, Float> features = new TreeMap<Integer, Float>();
    while (grammar_reader.hasNext()) {
      String grammar_line = grammar_reader.next().trim();
      counter++;
      slice_counter++;

      String[] fields = grammar_line.split("\\s\\|{3}\\s");
      if (fields.length < 4) {
        logger.warning("Incomplete grammar line at line " + counter);
        continue;
      }
      String lhs_word = fields[0];
      String[] source_words = fields[1].split("\\s");
      String[] target_words = fields[2].split("\\s");
      String[] feature_entries = fields[3].split("\\s");

      // Reached slice limit size, indicate that we're closing up.
      if (!ready_to_flush
          && (slice_counter > SLICE_SIZE
              || feature_buffer.overflowing()
              || (packAlignments && alignment_buffer.overflowing()))) {
        ready_to_flush = true;
        first_source_word = source_words[0];
      }
      // Finished closing up.
      if (ready_to_flush && !first_source_word.equals(source_words[0])) {
        slices.add(flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices));
        source_trie.clear();
        target_trie.clear();
        feature_buffer.clear();
        if (packAlignments) alignment_buffer.clear();

        num_slices++;
        slice_counter = 0;
        ready_to_flush = false;
      }

      int alignment_index = -1;
      // If present, process alignments.
      if (packAlignments) {
        if (!alignment_reader.hasNext()) {
          logger.severe("No more alignments starting in line " + counter);
          throw new RuntimeException("No more alignments starting in line " + counter);
        } else {
          String alignment_line = alignment_reader.next().trim();
          String[] alignment_entries = alignment_line.split("\\s");
          byte[] alignments = new byte[alignment_entries.length * 2];
          if (alignment_entries.length != 0) {
            for (int i = 0; i < alignment_entries.length; i++) {
              String[] parts = alignment_entries[i].split("-");
              alignments[2 * i] = Byte.parseByte(parts[0]);
              alignments[2 * i + 1] = Byte.parseByte(parts[1]);
            }
          }
          alignment_index = alignment_buffer.add(alignments);
        }
      }

      // Process features.
      // Implicitly sort via TreeMap, write to data buffer, remember position
      // to pass on to the source trie node.
      features.clear();
      for (int f = 0; f < feature_entries.length; ++f) {
        String feature_entry = feature_entries[f];
        if (this.labeled) {
          String[] parts = feature_entry.split("=");
          if (parts[0].equals("Alignment")) continue;
          int feature_id = Vocabulary.id(parts[0]);
          float feature_value = Float.parseFloat(parts[1]);
          if (feature_value != 0) features.put(encoderConfig.innerId(feature_id), feature_value);
        } else {
          float feature_value = Float.parseFloat(feature_entry);
          if (feature_value != 0) features.put(f, feature_value);
        }
      }
      int features_index = feature_buffer.add(features);

      // Sanity check on the data block index.
      if (packAlignments && features_index != alignment_index) {
        logger.severe(
            "Block index mismatch between features ("
                + features_index
                + ") and alignments ("
                + alignment_index
                + ").");
        throw new RuntimeException("Data block index mismatch.");
      }

      // Process source side.
      SourceValue sv = new SourceValue(Vocabulary.id(lhs_word), features_index);
      int[] source = new int[source_words.length];
      for (int i = 0; i < source_words.length; i++) {
        if (FormatUtils.isNonterminal(source_words[i]))
          source[i] = Vocabulary.id(FormatUtils.stripNt(source_words[i]));
        else source[i] = Vocabulary.id(source_words[i]);
      }
      source_trie.add(source, sv);

      // Process target side.
      TargetValue tv = new TargetValue(sv);
      int[] target = new int[target_words.length];
      for (int i = 0; i < target_words.length; i++) {
        if (FormatUtils.isNonterminal(target_words[i])) {
          target[target_words.length - (i + 1)] = -FormatUtils.getNonterminalIndex(target_words[i]);
        } else {
          target[target_words.length - (i + 1)] = Vocabulary.id(target_words[i]);
        }
      }
      target_trie.add(target, tv);
    }
    slices.add(flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices));
  }