Example #1
0
  /**
   * finds the next match in the current input, appends it to <code>out</code> and returns the
   * {@link FaAction} associated with the match. Input is read until a match is found, {@link
   * #maxCopy} is reached or EOF is hit. Non-matching input is handled according to {@link
   * #setOnFailedMatch setOnFailedMatch()}. In particular:
   *
   * <dl>
   *   <dt>{@link #UNMATCHED_COPY}
   *   <dd>will append up to {@link #maxCopy} non-matching characters in front of the match. If
   *       <code>maxCopy</code> is reached before the match, <b>no matching text is returned</b>,
   *       only the non-matching characters. In this case the return value is <code>null</code>, and
   *       should <code>maxCopy</code> be &le;&nbsp;1, then 1 character is always delivered. If a
   *       match is found before <code>maxCopy</code> is reached, the match is appended to <code>out
   *       </code>. To find out where the match actually starts, call {@link #matchStart()}.
   *   <dt>{@link #UNMATCHED_DROP}
   *   <dd>will drop (delete) unmatched text. In this case the matching text is the only text
   *       appended to <code>out</code>.
   *   <dt>{@link #UNMATCHED_THROW}
   *   <dd>causes a {@link monq.jfa.NomatchException} to be thrown. No text will be appended to
   *       <code>out</code> and the offenting text will still be available in the {@link CharSource}
   *       serving as input to <code>this</code>.
   * </dl>
   *
   * <p><b>Hint:</b> Use this method if you are interested only in a simple tokenization of the
   * input. The actions returned may serve as the token type. If you however want to apply the
   * actions returned immediately to the match, then rather use one of the <code>read</code> or
   * <code>filter</code> methods. If you find yourself using <code>if</code> statements on the
   * <code>FaAction</code> returned, you are definitively doing something wrong.
   *
   * @return
   *     <dl>
   *       <dt>eofAction
   *       <dd>When EOF is hit the first time and the <code>Dfa</code> operated has a action set for
   *           EOF which is not <code>null</code> this is returned (see {@link Nfa#compile
   *           Nfa.compile()}).
   *       <dt>{@link #EOF}
   *       <dd>if EOF is hit and <code>eofAction</code> was already delivered or is <code>null
   *           </code>. The output may have non-matching input that was found just before EOF.
   *       <dt><code>null</code>
   *       <dd>if <code>UNMATCHED_COPY</code> is active and <code>maxCopy</code> non-matching
   *           characters where found before a match was encountered.
   *       <dt>an action
   *       <dd>found for a match.
   *     </dl>
   */
  public FaAction next(StringBuilder out) throws java.io.IOException {
    matchStart = out.length();
    FaAction a = dfa.match(in, out, smd);

    if (a == null) {
      // There was no match, so we have to search for the first
      // match. Note: there is always at least one character available as
      // long as not Dfa.EOF is returned by dfa.match()
      if (onFailedMatch == UNMATCHED_COPY) {
        int unmatched = 0;
        do {
          out.append((char) (in.read()));
          unmatched += 1;
          a = dfa.match(in, out, smd);
        } while (a == null && unmatched < maxCopy);
        matchStart += unmatched;

      } else if (onFailedMatch == UNMATCHED_DROP) {
        do {
          in.read();
          a = dfa.match(in, out, smd);
        } while (a == null);

      } else {
        // everything else is a failure
        String emsg = lookahead();
        throw new NomatchException(
            "no matching regular expression " + "when looking at `" + emsg + "'");
      }
    }

    // We handle EOF and eofAction as if we have found a match
    if (a == EOF && dfa.eofAction != null && eofArmed) {
      eofArmed = false;
      return dfa.eofAction;
    }

    return a;
  }
Example #2
0
 /**
  * fetch a bit of lookahead for use in messages for exceptions. The lookahead is pushed back into
  * the input afterwards.
  */
 private String lookahead() {
   // Read up to 30 chars for a decent error message
   StringBuilder sb = new StringBuilder(30);
   int i;
   try {
     for (i = 0; i < 30; i++) {
       int ch = in.read();
       if (ch == -1) break;
       sb.append((char) ch);
     }
   } catch (java.io.IOException e) {
     in.pushBack(sb, 0);
     return "IOException when trying to generate context info";
   }
   String result;
   if (i == 30) result = sb.substring(0, 27) + "...";
   else result = sb.toString() + "[EOF]";
   in.pushBack(sb, 0);
   return result;
 }
Example #3
0
 /**
  * reads one character immediately from the input source and returns it without filtering. If
  * filtered characters are already available because of a previous {@link #read()}, these are not
  * touched and will be used in the next call to one of the <code>read()</code> functions.
  */
 public int skip() throws java.io.IOException {
   return in.read();
 }