Java FeatureBuffer Examples

Programming Language: Java

Class/Type: FeatureBuffer

Examples at hotexamples.com: 2

Java FeatureBuffer - 2 examples found. These are the top rated real world Java examples of FeatureBuffer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add(1)

clear(1)

flush(1)

initialize(1)

overflowing(1)

write(1)

Example #1

Show file

File: GrammarPacker.java Project: se4u/joshua

  /**
   * Serializes the source, target and feature data structures into interlinked binary files. Target
   * is written first, into a skeletal (node don't carry any data) upward-pointing trie, updating
   * the linking source trie nodes with the position once it is known. Source and feature data are
   * written simultaneously. The source structure is written into a downward-pointing trie and
   * stores the rule's lhs as well as links to the target and feature stream. The feature stream is
   * prompted to write out a block
   *
   * @param source_trie
   * @param target_trie
   * @param feature_buffer
   * @param id
   * @throws IOException
   */
  private PackingFileTuple flush(
      PackingTrie<SourceValue> source_trie,
      PackingTrie<TargetValue> target_trie,
      FeatureBuffer feature_buffer,
      AlignmentBuffer alignment_buffer,
      int id)
      throws IOException {
    // Make a slice object for this piece of the grammar.
    PackingFileTuple slice = new PackingFileTuple("slice_" + String.format("%05d", id));
    // Pull out the streams for source, target and data output.
    DataOutputStream source_stream = slice.getSourceOutput();
    DataOutputStream target_stream = slice.getTargetOutput();
    DataOutputStream target_lookup_stream = slice.getTargetLookupOutput();
    DataOutputStream feature_stream = slice.getFeatureOutput();
    DataOutputStream alignment_stream = slice.getAlignmentOutput();

    Queue<PackingTrie<TargetValue>> target_queue;
    Queue<PackingTrie<SourceValue>> source_queue;

    // The number of bytes both written into the source stream and
    // buffered in the source queue.
    int source_position;
    // The number of bytes written into the target stream.
    int target_position;

    // Add trie root into queue, set target position to 0 and set cumulated
    // size to size of trie root.
    target_queue = new LinkedList<PackingTrie<TargetValue>>();
    target_queue.add(target_trie);
    target_position = 0;

    // Target lookup table for trie levels.
    int current_level_size = 1;
    int next_level_size = 0;
    ArrayList<Integer> target_lookup = new ArrayList<Integer>();

    // Packing loop for upwards-pointing target trie.
    while (!target_queue.isEmpty()) {
      // Pop top of queue.
      PackingTrie<TargetValue> node = target_queue.poll();
      // Register that this is where we're writing the node to.
      node.address = target_position;
      // Tell source nodes that we're writing to this position in the file.
      for (TargetValue tv : node.values) tv.parent.target = node.address;
      // Write link to parent.
      if (node.parent != null) target_stream.writeInt(node.parent.address);
      else target_stream.writeInt(-1);
      target_stream.writeInt(node.symbol);
      // Enqueue children.
      for (int k : node.children.descendingKeySet()) {
        PackingTrie<TargetValue> child = node.children.get(k);
        target_queue.add(child);
      }
      target_position += node.size(false, true);
      next_level_size += node.children.descendingKeySet().size();

      current_level_size--;
      if (current_level_size == 0) {
        target_lookup.add(target_position);
        current_level_size = next_level_size;
        next_level_size = 0;
      }
    }
    target_lookup_stream.writeInt(target_lookup.size());
    for (int i : target_lookup) target_lookup_stream.writeInt(i);
    target_lookup_stream.close();

    // Setting up for source and data writing.
    source_queue = new LinkedList<PackingTrie<SourceValue>>();
    source_queue.add(source_trie);
    source_position = source_trie.size(true, false);
    source_trie.address = target_position;

    // Ready data buffers for writing.
    feature_buffer.initialize();
    if (packAlignments) alignment_buffer.initialize();

    // Packing loop for downwards-pointing source trie.
    while (!source_queue.isEmpty()) {
      // Pop top of queue.
      PackingTrie<SourceValue> node = source_queue.poll();
      // Write number of children.
      source_stream.writeInt(node.children.size());
      // Write links to children.
      for (int k : node.children.descendingKeySet()) {
        PackingTrie<SourceValue> child = node.children.get(k);
        // Enqueue child.
        source_queue.add(child);
        // Child's address will be at the current end of the queue.
        child.address = source_position;
        // Advance cumulated size by child's size.
        source_position += child.size(true, false);
        // Write the link.
        source_stream.writeInt(k);
        source_stream.writeInt(child.address);
      }
      // Write number of data items.
      source_stream.writeInt(node.values.size());
      // Write lhs and links to target and data.
      for (SourceValue sv : node.values) {
        int feature_block_index = feature_buffer.write(sv.data);
        if (packAlignments) {
          int alignment_block_index = alignment_buffer.write(sv.data);
          if (alignment_block_index != feature_block_index) {
            logger.severe("Block index mismatch.");
            throw new RuntimeException(
                "Block index mismatch: alignment ("
                    + alignment_block_index
                    + ") and features ("
                    + feature_block_index
                    + ") don't match.");
          }
        }
        source_stream.writeInt(sv.lhs);
        source_stream.writeInt(sv.target);
        source_stream.writeInt(feature_block_index);
      }
    }
    // Flush the data stream.
    feature_buffer.flush(feature_stream);
    if (packAlignments) alignment_buffer.flush(alignment_stream);

    target_stream.close();
    source_stream.close();
    feature_stream.close();
    if (packAlignments) alignment_stream.close();

    return slice;
  }

Example #2

Show file

File: GrammarPacker.java Project: se4u/joshua

  private void binarize(
      LineReader grammar_reader, LineReader alignment_reader, Queue<PackingFileTuple> slices)
      throws IOException {
    int counter = 0;
    int slice_counter = 0;
    int num_slices = 0;

    boolean ready_to_flush = false;
    String first_source_word = null;

    PackingTrie<SourceValue> source_trie = new PackingTrie<SourceValue>();
    PackingTrie<TargetValue> target_trie = new PackingTrie<TargetValue>();
    FeatureBuffer feature_buffer = new FeatureBuffer();

    AlignmentBuffer alignment_buffer = null;
    if (packAlignments) alignment_buffer = new AlignmentBuffer();

    TreeMap<Integer, Float> features = new TreeMap<Integer, Float>();
    while (grammar_reader.hasNext()) {
      String grammar_line = grammar_reader.next().trim();
      counter++;
      slice_counter++;

      String[] fields = grammar_line.split("\\s\\|{3}\\s");
      if (fields.length < 4) {
        logger.warning("Incomplete grammar line at line " + counter);
        continue;
      }
      String lhs_word = fields[0];
      String[] source_words = fields[1].split("\\s");
      String[] target_words = fields[2].split("\\s");
      String[] feature_entries = fields[3].split("\\s");

      // Reached slice limit size, indicate that we're closing up.
      if (!ready_to_flush
          && (slice_counter > SLICE_SIZE
              || feature_buffer.overflowing()
              || (packAlignments && alignment_buffer.overflowing()))) {
        ready_to_flush = true;
        first_source_word = source_words[0];
      }
      // Finished closing up.
      if (ready_to_flush && !first_source_word.equals(source_words[0])) {
        slices.add(flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices));
        source_trie.clear();
        target_trie.clear();
        feature_buffer.clear();
        if (packAlignments) alignment_buffer.clear();

        num_slices++;
        slice_counter = 0;
        ready_to_flush = false;
      }

      int alignment_index = -1;
      // If present, process alignments.
      if (packAlignments) {
        if (!alignment_reader.hasNext()) {
          logger.severe("No more alignments starting in line " + counter);
          throw new RuntimeException("No more alignments starting in line " + counter);
        } else {
          String alignment_line = alignment_reader.next().trim();
          String[] alignment_entries = alignment_line.split("\\s");
          byte[] alignments = new byte[alignment_entries.length * 2];
          if (alignment_entries.length != 0) {
            for (int i = 0; i < alignment_entries.length; i++) {
              String[] parts = alignment_entries[i].split("-");
              alignments[2 * i] = Byte.parseByte(parts[0]);
              alignments[2 * i + 1] = Byte.parseByte(parts[1]);
            }
          }
          alignment_index = alignment_buffer.add(alignments);
        }
      }

      // Process features.
      // Implicitly sort via TreeMap, write to data buffer, remember position
      // to pass on to the source trie node.
      features.clear();
      for (int f = 0; f < feature_entries.length; ++f) {
        String feature_entry = feature_entries[f];
        if (this.labeled) {
          String[] parts = feature_entry.split("=");
          if (parts[0].equals("Alignment")) continue;
          int feature_id = Vocabulary.id(parts[0]);
          float feature_value = Float.parseFloat(parts[1]);
          if (feature_value != 0) features.put(encoderConfig.innerId(feature_id), feature_value);
        } else {
          float feature_value = Float.parseFloat(feature_entry);
          if (feature_value != 0) features.put(f, feature_value);
        }
      }
      int features_index = feature_buffer.add(features);

      // Sanity check on the data block index.
      if (packAlignments && features_index != alignment_index) {
        logger.severe(
            "Block index mismatch between features ("
                + features_index
                + ") and alignments ("
                + alignment_index
                + ").");
        throw new RuntimeException("Data block index mismatch.");
      }

      // Process source side.
      SourceValue sv = new SourceValue(Vocabulary.id(lhs_word), features_index);
      int[] source = new int[source_words.length];
      for (int i = 0; i < source_words.length; i++) {
        if (FormatUtils.isNonterminal(source_words[i]))
          source[i] = Vocabulary.id(FormatUtils.stripNt(source_words[i]));
        else source[i] = Vocabulary.id(source_words[i]);
      }
      source_trie.add(source, sv);

      // Process target side.
      TargetValue tv = new TargetValue(sv);
      int[] target = new int[target_words.length];
      for (int i = 0; i < target_words.length; i++) {
        if (FormatUtils.isNonterminal(target_words[i])) {
          target[target_words.length - (i + 1)] = -FormatUtils.getNonterminalIndex(target_words[i]);
        } else {
          target[target_words.length - (i + 1)] = Vocabulary.id(target_words[i]);
        }
      }
      target_trie.add(target, tv);
    }
    slices.add(flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices));
  }