예제 #1
0
  /**
   * Static service to get a term from its string representation, providing a specific operator
   * manager
   */
  public static Term parseSingleTerm(String st, OperatorManager op) throws InvalidTermException {
    try {
      Parser p = new Parser(op, st);
      Token t = p.tokenizer.readToken();
      if (t.isEOF()) throw new InvalidTermException("Term starts with EOF");

      p.tokenizer.unreadToken(t);
      Term term = p.expr(false);
      if (term == null) throw new InvalidTermException("Term is null");
      if (!p.tokenizer.readToken().isEOF())
        throw new InvalidTermException("The entire string could not be read as one term");
      term.resolveTerm();
      return term;
    } catch (IOException ex) {
      throw new InvalidTermException("An I/O error occured");
    }
  }
예제 #2
0
  /**
   * Parses and returns a valid 'leftside' of an expression. If the left side starts with a prefix,
   * it consumes other expressions with a lower priority than itself. If the left side does not have
   * a prefix it must be an expr0.
   *
   * @param commaIsEndMarker used when the leftside is part of and argument list of expressions
   * @param maxPriority operators with a higher priority than this will effectivly end the
   *     expression
   * @return a wrapper of: 1. term correctly structured and 2. the priority of its root operator
   * @throws InvalidTermException
   */
  private IdentifiedTerm parseLeftSide(boolean commaIsEndMarker, int maxPriority)
      throws InvalidTermException, IOException {
    // 1. prefix expression
    Token f = tokenizer.readToken();
    if (f.isOperator(commaIsEndMarker)) {
      int FX = opManager.opPrio(f.seq, "fx");
      int FY = opManager.opPrio(f.seq, "fy");

      if (f.seq.equals("-")) {
        Token t = tokenizer.readToken();
        if (t.isNumber())
          /*Michele Castagna 06/2011*/
          // return new IdentifiedTerm(0, Parser.createNumber("-" + t.seq));
          return identifyTerm(0, Parser.createNumber("-" + t.seq), tokenStart);
        /**/
        else tokenizer.unreadToken(t);
      }

      // check that no operator has a priority higher than permitted
      if (FY > maxPriority) FY = -1;
      if (FX > maxPriority) FX = -1;

      // FX has priority over FY
      boolean haveAttemptedFX = false;
      if (FX >= FY && FX >= OperatorManager.OP_LOW) {
        IdentifiedTerm found = exprA(FX - 1, commaIsEndMarker); // op(fx, n) exprA(n - 1)
        if (found != null)
          /*Castagna 06/2011*/
          // return new IdentifiedTerm(FX, new Struct(f.seq, found.result));
          return identifyTerm(FX, new Struct(f.seq, found.result), tokenStart);
        /**/
        else haveAttemptedFX = true;
      }
      // FY has priority over FX, or FX has failed
      if (FY >= OperatorManager.OP_LOW) {
        IdentifiedTerm found =
            exprA(FY, commaIsEndMarker); // op(fy,n) exprA(1200)  or   op(fy,n) exprA(n)
        if (found != null)
          /*Castagna 06/2011*/
          // return new IdentifiedTerm(FY, new Struct(f.seq, found.result));
          return identifyTerm(FY, new Struct(f.seq, found.result), tokenStart);
        /**/
      }
      // FY has priority over FX, but FY failed
      if (!haveAttemptedFX && FX >= OperatorManager.OP_LOW) {
        IdentifiedTerm found = exprA(FX - 1, commaIsEndMarker); // op(fx, n) exprA(n - 1)
        if (found != null)
          /*Castagna 06/2011*/
          // return new IdentifiedTerm(FX, new Struct(f.seq, found.result));
          return identifyTerm(FX, new Struct(f.seq, found.result), tokenStart);
        /**/
      }
    }
    tokenizer.unreadToken(f);
    // 2. expr0
    return new IdentifiedTerm(0, expr0());
  }
예제 #3
0
  Token readNextToken() throws IOException, InvalidTermException {
    int typea;
    String svala;
    if (pushBack2 != null) {
      typea = pushBack2.typea;
      svala = pushBack2.svala;
      pushBack2 = null;
    } else {
      typea = super.nextToken();
      svala = sval;
    }

    // skips whitespace
    // could be simplified if lookahead for blank space in functors wasn't necessary
    // and if '.' in numbers could be written with blank space
    while (Tokenizer.isWhite(typea)) {
      typea = super.nextToken();
      svala = sval;
    }

    // skips single line comments
    // could be simplified if % was not a legal character in quotes
    if (typea == '%') {
      do {
        typea = super.nextToken();
      } while (typea != '\r' && typea != '\n' && typea != TT_EOF);
      pushBack(); // pushes back \r or \n. These are whitespace, so when readNextToken() finds them,
                  // they are marked as whitespace
      return readNextToken();
    }

    // skips /* comments */
    if (typea == '/') {
      int typeb = super.nextToken();
      if (typeb == '*') {
        do {
          typea = typeb;
          typeb = super.nextToken();
        } while (typea != '*' || typeb != '/');
        return readNextToken();
      } else {
        pushBack();
      }
    }

    // syntactic charachters
    if (typea == TT_EOF) return new Token("", Tokenizer.EOF);
    if (typea == '(') return new Token("(", Tokenizer.LPAR);
    if (typea == ')') return new Token(")", Tokenizer.RPAR);
    if (typea == '{') return new Token("{", Tokenizer.LBRA2);
    if (typea == '}') return new Token("}", Tokenizer.RBRA2);
    if (typea == '[') return new Token("[", Tokenizer.LBRA);
    if (typea == ']') return new Token("]", Tokenizer.RBRA);
    if (typea == '|') return new Token("|", Tokenizer.BAR);

    if (typea == '!') return new Token("!", Tokenizer.ATOM);
    if (typea == ',') return new Token(",", Tokenizer.OPERATOR);

    if (typea
        == '.') { // check that '.' as end token is followed by a layout character, see ISO Standard
                  // 6.4.8 endnote
      int typeb = super.nextToken();
      if (Tokenizer.isWhite(typeb) || typeb == '%' || typeb == StreamTokenizer.TT_EOF)
        return new Token(".", Tokenizer.END);
      else pushBack();
    }

    boolean isNumber = false;

    // variable, atom or number
    if (typea == TT_WORD) {
      char firstChar = svala.charAt(0);
      // variable
      if (Character.isUpperCase(firstChar) || '_' == firstChar)
        return new Token(svala, Tokenizer.VARIABLE);
      else if (firstChar >= '0'
          && firstChar <= '9') // all words starting with 0 or 9 must be a number
      isNumber = true; // set type to number and handle later
      else { // otherwise, it must be an atom (or wrong)
        int typeb = super.nextToken(); // lookahead 1 to identify what type of atom
        pushBack(); // this does not skip whitespaces, only readNext does so.
        if (typeb == '(') return new Token(svala, Tokenizer.ATOM | Tokenizer.FUNCTOR);
        if (Tokenizer.isWhite(typeb)) return new Token(svala, Tokenizer.ATOM | Tokenizer.OPERATOR);
        return new Token(svala, Tokenizer.ATOM);
      }
    }

    // quotes
    if (typea == '\'' || typea == '\"' || typea == '`') {
      int qType = typea;
      StringBuffer quote = new StringBuffer();
      while (true) { // run through entire quote and added body to quote buffer
        typea = super.nextToken();
        svala = sval;
        // continuation escape sequence
        if (typea == '\\') {
          int typeb = super.nextToken();
          if (typeb == '\n') // continuation escape sequence marker \\n
          continue;
          if (typeb == '\r') {
            int typec = super.nextToken();
            if (typec == '\n') continue; // continuation escape sequence marker \\r\n
            pushBack();
            continue; // continuation escape sequence marker \\r
          }
          pushBack(); // pushback typeb
        }
        // double '' or "" or ``
        if (typea == qType) {
          int typeb = super.nextToken();
          if (typeb == qType) { // escaped '' or "" or ``
            quote.append((char) qType);
            continue;
          } else {
            pushBack();
            break; // otherwise, break on single quote
          }
        }
        if (typea == '\n' || typea == '\r')
          throw new InvalidTermException(
              "line break in quote not allowed (unless they are escaped \\ first)");

        if (svala != null) quote.append(svala);
        else quote.append((char) typea);
      }

      String quoteBody = quote.toString();

      qType = qType == '\'' ? SQ_SEQUENCE : qType == '\"' ? DQ_SEQUENCE : SQ_SEQUENCE;
      if (qType == SQ_SEQUENCE) {
        if (Parser.isAtom(quoteBody)) qType = ATOM;
        int typeb = super.nextToken(); // lookahead 1 to identify what type of quote
        pushBack(); // nextToken() does not skip whitespaces, only readNext does so.
        if (typeb == '(') return new Token(quoteBody, qType | FUNCTOR);
      }
      return new Token(quoteBody, qType);
    }

    // symbols
    if (Arrays.binarySearch(Tokenizer.GRAPHIC_CHARS, (char) typea) >= 0) {

      // the symbols are parsed individually by the super.nextToken(), so accumulate symbollist
      StringBuffer symbols = new StringBuffer();
      int typeb = typea;
      // String svalb = null;
      while (Arrays.binarySearch(Tokenizer.GRAPHIC_CHARS, (char) typeb) >= 0) {
        symbols.append((char) typeb);
        typeb = super.nextToken();
        // svalb = sval;
      }
      pushBack();

      // special symbols: unary + and unary -
      //            try {
      //                if (symbols.length() == 1 && typeb == TT_WORD &&
      // java.lang.Long.parseLong(svalb) > 0) {
      //                    if (typea == '+')                         //todo, issue of handling +
      // and -. I don't think this is ISO..
      //                        return readNextToken();               //skips + and returns the next
      // number
      //                    if (typea == '-') {
      //                        Token t = readNextToken();            //read the next number
      //                        t.seq = "-" + t.seq;                   //add minus to value
      //                        return t;                             //return token
      //                    }
      //                }                                             //ps. the reason why the
      // number isn't returned right away, but through nextToken(), is because the number might be
      // for instance a float
      //            } catch (NumberFormatException e) {
      //            }
      return new Token(symbols.toString(), Tokenizer.OPERATOR);
    }

    // numbers: 1. integer, 2. float
    if (isNumber) {
      try { // the various parseInt checks will throw exceptions when parts of numbers are written
            // illegally

        // 1.a. complex integers
        if (svala.startsWith("0")) {
          if (svala.indexOf('b') == 1)
            return new Token(
                "" + java.lang.Long.parseLong(svala.substring(2), 2),
                Tokenizer.INTEGER); // try binary
          if (svala.indexOf('o') == 1)
            return new Token(
                "" + java.lang.Long.parseLong(svala.substring(2), 8),
                Tokenizer.INTEGER); // try octal
          if (svala.indexOf('x') == 1)
            return new Token(
                "" + java.lang.Long.parseLong(svala.substring(2), 16),
                Tokenizer.INTEGER); // try hex
        }

        // lookahead 1
        int typeb = super.nextToken();
        String svalb = sval;

        // 1.b ordinary integers
        if (typeb != '.' && typeb != '\'') { // i.e. not float or character constant
          pushBack(); // lookahead 0
          return new Token("" + java.lang.Long.parseLong(svala), Tokenizer.INTEGER);
        }

        // 1.c character code constant
        if (typeb == '\'' && "0".equals(svala)) {
          int typec = super.nextToken(); // lookahead 2
          String svalc = sval;
          int intVal;
          if ((intVal = isCharacterCodeConstantToken(typec, svalc)) != -1)
            return new Token("" + intVal, Tokenizer.INTEGER);

          // this is an invalid character code constant int
          throw new InvalidTermException(
              "Character code constant starting with 0'<X> at line: "
                  + super.lineno()
                  + " cannot be recognized.");
        }

        // 2.a check that the value of the word prior to period is a valid long
        java.lang.Long.parseLong(svala); // throws an exception if not

        // 2.b first int is followed by a period
        if (typeb != '.')
          throw new InvalidTermException(
              "A number starting with 0-9 cannot be rcognized as an int and does not have a fraction '.' at line: "
                  + super.lineno());

        // lookahead 2
        int typec = super.nextToken();
        String svalc = sval;

        // 2.c check that the next token after '.' is a possible fraction
        if (typec != TT_WORD) { // if its not, the period is an End period
          pushBack(); // pushback 1 the token after period
          pushBack2 = new PushBack(typeb, svalb); // pushback 2 the period token
          return new Token(svala, INTEGER); // return what must be an int
        }

        // 2.d checking for exponent
        int exponent = svalc.indexOf("E");
        if (exponent == -1) exponent = svalc.indexOf("e");

        if (exponent >= 1) { // the float must have a valid exponent
          if (exponent == svalc.length() - 1) { // the exponent must be signed exponent
            int typeb2 = super.nextToken();
            if (typeb2 == '+' || typeb2 == '-') {
              int typec2 = super.nextToken();
              String svalc2 = sval;
              if (typec2 == TT_WORD) {
                // verify the remaining parts of the float and return
                java.lang.Long.parseLong(svalc.substring(0, exponent));
                java.lang.Integer.parseInt(svalc2);
                return new Token(svala + "." + svalc + (char) typeb2 + svalc2, Tokenizer.FLOAT);
              }
            }
          }
        }
        // 2.e verify lastly that ordinary floats and unsigned exponent floats are Java legal and
        // return them
        java.lang.Double.parseDouble(svala + "." + svalc);
        return new Token(svala + "." + svalc, Tokenizer.FLOAT);

      } catch (NumberFormatException e) {
        // TODO return more info on what was wrong with the number given
        throw new InvalidTermException(
            "A term starting with 0-9 cannot be parsed as a number at line: " + lineno());
      }
    }
    throw new InvalidTermException("Unknown Unicode character: " + typea + "  (" + svala + ")");
  }
예제 #4
0
  /**
   * exprA(0) ::= integer | float | variable | atom | atom( exprA(1200) { , exprA(1200) }* ) | '['
   * exprA(1200) { , exprA(1200) }* [ | exprA(1200) ] ']' | '{' [ exprA(1200) ] '}' | '('
   * exprA(1200) ')'
   */
  private Term expr0() throws InvalidTermException, IOException {
    Token t1 = tokenizer.readToken();

    /*Castagna 06/2011*/
    /*
    if (t1.isType(Tokenizer.INTEGER))
    	return Parser.parseInteger(t1.seq); //todo moved method to Number

    if (t1.isType(Tokenizer.FLOAT))
    	return Parser.parseFloat(t1.seq);   //todo moved method to Number

    if (t1.isType(Tokenizer.VARIABLE))
    	return new Var(t1.seq);             //todo switched to use the internal check for "_" in Var(String)
    */

    int tempStart = tokenizer.tokenStart();

    if (t1.isType(Tokenizer.INTEGER)) {
      Term i = Parser.parseInteger(t1.seq);
      map(i, tokenizer.tokenStart());
      return i; // todo moved method to Number
    }

    if (t1.isType(Tokenizer.FLOAT)) {
      Term f = Parser.parseFloat(t1.seq);
      map(f, tokenizer.tokenStart());
      return f; // todo moved method to Number
    }

    if (t1.isType(Tokenizer.VARIABLE)) {
      Term v = new Var(t1.seq);
      map(v, tokenizer.tokenStart());
      return v; // todo switched to use the internal check for "_" in Var(String)
    }
    /**/

    if (t1.isType(Tokenizer.ATOM)
        || t1.isType(Tokenizer.SQ_SEQUENCE)
        || t1.isType(Tokenizer.DQ_SEQUENCE)) {
      if (!t1.isFunctor())
      /*Castagna 06/2011*/
      {
        // return new Struct(t1.seq);
        Term f = new Struct(t1.seq);
        map(f, tokenizer.tokenStart());
        return f;
      }
      /**/

      String functor = t1.seq;
      Token t2 = tokenizer.readToken(); // reading left par
      if (!t2.isType(Tokenizer.LPAR))
        throw new InvalidTermException(
            "Something identified as functor misses its first left parenthesis"); // todo check can
      // be skipped
      LinkedList<Term> a = expr0_arglist(); // reading arguments
      Token t3 = tokenizer.readToken();
      if (t3.isType(Tokenizer.RPAR)) // reading right par
      /*Castagna 06/2011*/
      {
        // return new Struct(functor, a);
        Term c = new Struct(functor, a);
        map(c, tempStart);
        return c;
      }
      /**/
      /*Castagna 06/2011*/
      // throw new InvalidTermException("Missing right parenthesis: ("+a + " -> here <-");
      throw new InvalidTermException(
          "Missing right parenthesis '(" + a + "' -> here <-",
          tokenizer.offsetToRowColumn(getCurrentOffset())[0],
          tokenizer.offsetToRowColumn(getCurrentOffset())[1] - 1);
      /**/
    }

    if (t1.isType(Tokenizer.LPAR)) {
      Term term = expr(false);
      if (tokenizer.readToken().isType(Tokenizer.RPAR)) return term;
      /*Castagna 06/2011*/
      // throw new InvalidTermException("Missing right parenthesis: ("+term + " -> here <-");
      throw new InvalidTermException(
          "Missing right parenthesis '(" + term + "' -> here <-",
          tokenizer.offsetToRowColumn(getCurrentOffset())[0],
          tokenizer.offsetToRowColumn(getCurrentOffset())[1] - 1);
      /**/
    }

    if (t1.isType(Tokenizer.LBRA)) {
      Token t2 = tokenizer.readToken();
      if (t2.isType(Tokenizer.RBRA)) return new Struct();

      tokenizer.unreadToken(t2);
      Term term = expr0_list();
      if (tokenizer.readToken().isType(Tokenizer.RBRA)) return term;
      /*Castagna 06/2011*/
      // throw new InvalidTermException("Missing right bracket: ["+term + " -> here <-");
      throw new InvalidTermException(
          "Missing right bracket '[" + term + " ->' here <-",
          tokenizer.offsetToRowColumn(getCurrentOffset())[0],
          tokenizer.offsetToRowColumn(getCurrentOffset())[1] - 1);
      /**/
    }

    if (t1.isType(Tokenizer.LBRA2)) {
      Token t2 = tokenizer.readToken();
      if (t2.isType(Tokenizer.RBRA2))
      /*Castagna 06/2011*/
      {
        // return new Struct("{}");
        Term b = new Struct("{}");
        map(b, tempStart);
        return b;
      }
      /**/
      tokenizer.unreadToken(t2);
      Term arg = expr(false);
      t2 = tokenizer.readToken();
      if (t2.isType(Tokenizer.RBRA2))
      /*Castagna 06/2011*/
      {
        // return new Struct("{}", arg);
        Term b = new Struct("{}", arg);
        map(b, tempStart);
        return b;
      }
      /*Castagna 06/2011*/
      // throw new InvalidTermException("Missing right braces: {"+arg + " -> here <-");
      throw new InvalidTermException(
          "Missing right braces '{" + arg + "' -> here <-",
          tokenizer.offsetToRowColumn(getCurrentOffset())[0],
          tokenizer.offsetToRowColumn(getCurrentOffset())[1] - 1);
      /**/
    }
    /*Castagna 06/2011*/
    // throw new InvalidTermException("The following token could not be identified: "+t1.seq);
    throw new InvalidTermException(
        "Unexpected token '" + t1.seq + "'",
        tokenizer.offsetToRowColumn(getCurrentOffset())[0],
        tokenizer.offsetToRowColumn(getCurrentOffset())[1] - 1);
    /**/
  }