Example #1
0
  /*
   * Check and apply implicit and explicit lexical precedence rules. Display
   * errors and infos for the human user during the process.
   *
   * @param automaton
   *            is the automaton to check. In order to have the explicit
   *            priorities applied, it is required that the automaton is
   *            tagged with the acceptation of the LexerExpression.
   * @return a new automaton where only the right acceptation tags remains.
   */
  public Automaton checkAndApplyLexerPrecedence(
      Automaton automaton, Trace trace, Strictness strictness) {

    automaton = automaton.minimal();
    Map<State, String> words = automaton.collectShortestWords();
    Map<Acceptation, Set<State>> accepts = automaton.collectAcceptationStates();

    // Associate each acceptation with the ones it share at least a common
    // state.
    Map<Acceptation, Set<Acceptation>> conflicts = new HashMap<Acceptation, Set<Acceptation>>();

    // Associate each acceptation with the ones it supersedes.
    Map<Acceptation, Set<Acceptation>> priorities = new HashMap<Acceptation, Set<Acceptation>>();

    // Fill the priorities structure with the implicit inclusion rule
    for (Acceptation acc1 : automaton.getAcceptations()) {
      if (acc1 == Acceptation.ACCEPT) {
        continue;
      }

      // FIXME: empty LexerExpressions are not detected here since
      // their acceptation tag is not in the automaton.

      // Collect all the conflicts
      Set<State> set1 = accepts.get(acc1);
      Set<Acceptation> confs = new TreeSet<Acceptation>();
      for (State s : set1) {
        confs.addAll(s.getAcceptations());
      }
      conflicts.put(acc1, confs);

      // Check for implicit priority for each conflict
      for (Acceptation acc2 : confs) {
        if (acc2 == Acceptation.ACCEPT) {
          continue;
        }
        if (acc1 == acc2) {
          continue;
        }
        Set<State> set2 = accepts.get(acc2);
        if (set2.equals(set1)) {
          if (!conflicts.containsKey(acc2)) {
            throw SemanticException.genericError(
                "The " + acc1.getName() + " and " + acc2.getName() + " tokens are equivalent.");
          }
        } else if (set2.containsAll(set1)) {
          addPriority(priorities, acc1, acc2);
          State example = null;
          for (State s : set2) {
            if (!set1.contains(s)) {
              example = s;
              break;
            }
          }
          // Note: Since set1 is strictly included in set2, example
          // cannot be null
          trace.verboseln(
              "    The "
                  + acc1.getName()
                  + " token is included in the "
                  + acc2.getName()
                  + " token. (Example of divergence: '"
                  + words.get(example)
                  + "'.)");
        }
      }
    }

    // Collect new acceptation states and see if a conflict still exists
    Map<State, Acceptation> newAccepts = new HashMap<State, Acceptation>();
    for (State s : automaton.getStates()) {
      if (s.getAcceptations().isEmpty()) {
        continue;
      }
      Acceptation candidate = s.getAcceptations().first();
      for (Acceptation challenger : s.getAcceptations()) {
        if (candidate == challenger) {
          continue;
        }
        if (hasPriority(priorities, candidate, challenger)) {
          // nothing. keep the candidate
        } else if (hasPriority(priorities, challenger, candidate)) {
          candidate = challenger;
        } else {
          throw SemanticException.genericError(
              "The "
                  + candidate.getName()
                  + " token and the "
                  + challenger.getName()
                  + " token conflict on the string '"
                  + words.get(s)
                  + "'. You should specify a precedence between them.");
        }
      }
      newAccepts.put(s, candidate);
    }

    // Ask for a new automaton with the correct acceptation states.
    return automaton.resetAcceptations(newAccepts);
  }
Example #2
0
  public void compileLexer(Trace trace, Strictness strictness) {

    Automaton automaton;
    if (this.globalAnonymousContext != null) {
      automaton = this.globalAnonymousContext.computeAutomaton().minimal().longest().minimal();
      automaton = checkAndApplyLexerPrecedence(automaton, trace, strictness).minimal();
      this.lexer.setAutomaton(automaton.withMarkers().minimal());
    } else {
      throw new InternalException("not implemented");
    }

    for (Context context : this.namedContexts) {
      context.computeAutomaton();
    }

    // Look for useless LexerExpression
    for (LexerExpression lexerExpression : this.lexer.getExpressions()) {
      // If their is no automaton saved it means that the LexerExpression
      // was not used to build the big automaton.
      if (lexerExpression.getSavedAutomaton() == null) {
        if (strictness == Strictness.STRICT) {
          throw SemanticException.genericError(
              "The " + lexerExpression.getExpressionName() + " expression is useless.");
        } else {
          trace.verboseln(
              "    The " + lexerExpression.getExpressionName() + " expression is useless.");
        }
      }
    }

    for (LexerExpression lexerExpression : this.lexer.getExpressions()) {
      // Note: getting the automaton forces the validation of the semantic
      // validity of (eg. cirularity)
      Automaton lexerAutomaton = lexerExpression.getAutomaton();
    }

    for (LexerExpression lexerExpression :
        this.globalAnonymousContext.getLexerExpressionTokensAndIgnored()) {
      // Note: The big automaton has to be minimal (thus with the unused
      // acceptations removed)
      if (!automaton.getAcceptations().contains(lexerExpression.getAcceptation())) {
        if (strictness == Strictness.STRICT) {
          throw SemanticException.genericError(
              "The " + lexerExpression.getExpressionName() + " token does not match anything.");
        } else {
          trace.verboseln(
              "    The " + lexerExpression.getExpressionName() + " token does not match anything.");
        }
      }

      Automaton expressionAutomaton = lexerExpression.getAutomaton();
      for (RichSymbol richSymbol : expressionAutomaton.getStartState().getTransitions().keySet()) {
        if (richSymbol.isLookahead()) {
          // We have a lookahead transition from the start state.
          // Note: this works since the result of getAutomaton() is
          // minimized.
          throw SemanticException.genericError(
              "The " + lexerExpression.getExpressionName() + " token matches the empty string.");
        }
      }
    }
  }