Beispiel #1
0
  /**
   * Looks up the output for this input, or null if the input is not accepted. FST must be
   * INPUT_TYPE.BYTE4.
   */
  public static <T> T get(FST<T> fst, IntsRef input) throws IOException {
    assert fst.inputType == FST.INPUT_TYPE.BYTE4;

    // TODO: would be nice not to alloc this on every lookup
    final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

    // Accumulate output as we go
    final T NO_OUTPUT = fst.outputs.getNoOutput();
    T output = NO_OUTPUT;
    for (int i = 0; i < input.length; i++) {
      if (fst.findTargetArc(input.ints[input.offset + i], arc, arc) == null) {
        return null;
      } else if (arc.output != NO_OUTPUT) {
        output = fst.outputs.add(output, arc.output);
      }
    }

    if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
      return null;
    } else if (arc.output != NO_OUTPUT) {
      return fst.outputs.add(output, arc.output);
    } else {
      return output;
    }
  }
Beispiel #2
0
  /**
   * Logically casts input to UTF32 ints then looks up the output or null if the input is not
   * accepted. FST must be INPUT_TYPE.BYTE4.
   */
  public static <T> T get(FST<T> fst, CharSequence input) throws IOException {
    assert fst.inputType == FST.INPUT_TYPE.BYTE4;

    // TODO: would be nice not to alloc this on every lookup
    final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

    int charIdx = 0;
    final int charLimit = input.length();

    // Accumulate output as we go
    final T NO_OUTPUT = fst.outputs.getNoOutput();
    T output = NO_OUTPUT;

    while (charIdx < charLimit) {
      final int utf32 = Character.codePointAt(input, charIdx);
      charIdx += Character.charCount(utf32);

      if (fst.findTargetArc(utf32, arc, arc) == null) {
        return null;
      } else if (arc.output != NO_OUTPUT) {
        output = fst.outputs.add(output, arc.output);
      }
    }

    if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
      return null;
    } else if (arc.output != NO_OUTPUT) {
      return fst.outputs.add(output, arc.output);
    } else {
      return output;
    }
  }
Beispiel #3
0
  /**
   * Dumps an {@link FST} to a GraphViz's <code>dot</code> language description for visualization.
   * Example of use:
   *
   * <pre>
   * PrintStream ps = new PrintStream(&quot;out.dot&quot;);
   * fst.toDot(ps);
   * ps.close();
   * </pre>
   *
   * and then, from command line:
   *
   * <pre>
   * dot -Tpng -o out.png out.dot
   * </pre>
   *
   * <p>Note: larger FSTs (a few thousand nodes) won't even render, don't bother.
   *
   * @param sameRank If <code>true</code>, the resulting <code>dot</code> file will try to order
   *     states in layers of breadth-first traversal. This may mess up arcs, but makes the output
   *     FST's structure a bit clearer.
   * @param labelStates If <code>true</code> states will have labels equal to their offsets in their
   *     binary format. Expands the graph considerably.
   * @see "http://www.graphviz.org/"
   */
  public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates)
      throws IOException {
    final String expandedNodeColor = "blue";

    // This is the start arc in the automaton (from the epsilon state to the first state
    // with outgoing transitions.
    final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());

    // A queue of transitions to consider for the next level.
    final List<FST.Arc<T>> thisLevelQueue = new ArrayList<FST.Arc<T>>();

    // A queue of transitions to consider when processing the next level.
    final List<FST.Arc<T>> nextLevelQueue = new ArrayList<FST.Arc<T>>();
    nextLevelQueue.add(startArc);

    // A list of states on the same level (for ranking).
    final List<Integer> sameLevelStates = new ArrayList<Integer>();

    // A bitset of already seen states (target offset).
    final BitSet seen = new BitSet();
    seen.set(startArc.target);

    // Shape for states.
    final String stateShape = "circle";

    // Emit DOT prologue.
    out.write("digraph FST {\n");
    out.write("  rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");

    if (!labelStates) {
      out.write("  node [shape=circle, width=.2, height=.2, style=filled]\n");
    }

    emitDotState(out, "initial", "point", "white", "");
    emitDotState(
        out,
        Integer.toString(startArc.target),
        stateShape,
        fst.isExpandedTarget(startArc) ? expandedNodeColor : null,
        "");
    out.write("  initial -> " + startArc.target + "\n");

    final T NO_OUTPUT = fst.outputs.getNoOutput();
    int level = 0;

    while (!nextLevelQueue.isEmpty()) {
      // we could double buffer here, but it doesn't matter probably.
      thisLevelQueue.addAll(nextLevelQueue);
      nextLevelQueue.clear();

      level++;
      out.write("\n  // Transitions and states at level: " + level + "\n");
      while (!thisLevelQueue.isEmpty()) {
        final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);

        if (fst.targetHasArcs(arc)) {
          // scan all arcs
          final int node = arc.target;
          fst.readFirstTargetArc(arc, arc);

          while (true) {
            // Emit the unseen state and add it to the queue for the next level.
            if (arc.target >= 0 && !seen.get(arc.target)) {
              final boolean isExpanded = fst.isExpandedTarget(arc);
              emitDotState(
                  out,
                  Integer.toString(arc.target),
                  stateShape,
                  isExpanded ? expandedNodeColor : null,
                  labelStates ? Integer.toString(arc.target) : "");
              seen.set(arc.target);
              nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
              sameLevelStates.add(arc.target);
            }

            String outs;
            if (arc.output != NO_OUTPUT) {
              outs = "/" + fst.outputs.outputToString(arc.output);
            } else {
              outs = "";
            }

            final String cl;
            if (arc.label == FST.END_LABEL) {
              cl = "~";
            } else {
              cl = printableLabel(arc.label);
            }

            out.write("  " + node + " -> " + arc.target + " [label=\"" + cl + outs + "\"]\n");

            // Break the loop if we're on the last arc of this state.
            if (arc.isLast()) {
              break;
            }
            fst.readNextArc(arc);
          }
        }
      }

      // Emit state ranking information.
      if (sameRank && sameLevelStates.size() > 1) {
        out.write("  {rank=same; ");
        for (int state : sameLevelStates) {
          out.write(state + "; ");
        }
        out.write(" }\n");
      }
      sameLevelStates.clear();
    }

    // Emit terminating state (always there anyway).
    out.write("  -1 [style=filled, color=black, shape=circle, label=\"\"]\n\n");
    out.write("  {rank=sink; -1 }\n");

    out.write("}\n");
    out.flush();
  }