Example #1
0
  private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) {
    if (itr.isDone() || sts.isError()) {
      return false;
    }

    boolean found = false;

    while (!itr.isDone()) {
      String s = itr.current();
      if (!isExtlang(s)) {
        break;
      }
      found = true;
      if (extlangs.isEmpty()) {
        extlangs = new ArrayList<>(3);
      }
      extlangs.add(s);
      sts.parseLength = itr.currentEnd();
      itr.next();

      if (extlangs.size() == 3) {
        // Maximum 3 extlangs
        break;
      }
    }

    return found;
  }
Example #2
0
  private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) {
    if (itr.isDone() || sts.isError()) {
      return false;
    }

    boolean found = false;

    String s = itr.current();
    if (isRegion(s)) {
      found = true;
      region = s;
      sts.parseLength = itr.currentEnd();
      itr.next();
    }

    return found;
  }
Example #3
0
  private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) {
    if (itr.isDone() || sts.isError()) {
      return false;
    }

    boolean found = false;

    while (!itr.isDone()) {
      String s = itr.current();
      if (isExtensionSingleton(s)) {
        int start = itr.currentStart();
        String singleton = s;
        StringBuilder sb = new StringBuilder(singleton);

        itr.next();
        while (!itr.isDone()) {
          s = itr.current();
          if (isExtensionSubtag(s)) {
            sb.append(SEP).append(s);
            sts.parseLength = itr.currentEnd();
          } else {
            break;
          }
          itr.next();
        }

        if (sts.parseLength <= start) {
          sts.errorIndex = start;
          sts.errorMsg = "Incomplete extension '" + singleton + "'";
          break;
        }

        if (extensions.isEmpty()) {
          extensions = new ArrayList<>(4);
        }
        extensions.add(sb.toString());
        found = true;
      } else {
        break;
      }
    }
    return found;
  }
Example #4
0
  /*
   * BNF in RFC5646
   *
   * Language-Tag  = langtag             ; normal language tags
   *               / privateuse          ; private use tag
   *               / grandfathered       ; grandfathered tags
   *
   *
   * langtag       = language
   *                 ["-" script]
   *                 ["-" region]
   *                 *("-" variant)
   *                 *("-" extension)
   *                 ["-" privateuse]
   *
   * language      = 2*3ALPHA            ; shortest ISO 639 code
   *                 ["-" extlang]       ; sometimes followed by
   *                                     ; extended language subtags
   *               / 4ALPHA              ; or reserved for future use
   *               / 5*8ALPHA            ; or registered language subtag
   *
   * extlang       = 3ALPHA              ; selected ISO 639 codes
   *                 *2("-" 3ALPHA)      ; permanently reserved
   *
   * script        = 4ALPHA              ; ISO 15924 code
   *
   * region        = 2ALPHA              ; ISO 3166-1 code
   *               / 3DIGIT              ; UN M.49 code
   *
   * variant       = 5*8alphanum         ; registered variants
   *               / (DIGIT 3alphanum)
   *
   * extension     = singleton 1*("-" (2*8alphanum))
   *
   *                                     ; Single alphanumerics
   *                                     ; "x" reserved for private use
   * singleton     = DIGIT               ; 0 - 9
   *               / %x41-57             ; A - W
   *               / %x59-5A             ; Y - Z
   *               / %x61-77             ; a - w
   *               / %x79-7A             ; y - z
   *
   * privateuse    = "x" 1*("-" (1*8alphanum))
   *
   */
  public static LanguageTag parse(String languageTag, ParseStatus sts) {
    if (sts == null) {
      sts = new ParseStatus();
    } else {
      sts.reset();
    }

    StringTokenIterator itr;

    // Check if the tag is grandfathered
    String[] gfmap = GRANDFATHERED.get(LocaleUtils.toLowerString(languageTag));
    if (gfmap != null) {
      // use preferred mapping
      itr = new StringTokenIterator(gfmap[1], SEP);
    } else {
      itr = new StringTokenIterator(languageTag, SEP);
    }

    LanguageTag tag = new LanguageTag();

    // langtag must start with either language or privateuse
    if (tag.parseLanguage(itr, sts)) {
      tag.parseExtlangs(itr, sts);
      tag.parseScript(itr, sts);
      tag.parseRegion(itr, sts);
      tag.parseVariants(itr, sts);
      tag.parseExtensions(itr, sts);
    }
    tag.parsePrivateuse(itr, sts);

    if (!itr.isDone() && !sts.isError()) {
      String s = itr.current();
      sts.errorIndex = itr.currentStart();
      if (s.length() == 0) {
        sts.errorMsg = "Empty subtag";
      } else {
        sts.errorMsg = "Invalid subtag: " + s;
      }
    }

    return tag;
  }
  /**
   * Parses Javadoc comment as DetailNode tree.
   *
   * @param javadocCommentAst DetailAST of Javadoc comment
   * @return DetailNode tree of Javadoc comment
   */
  public ParseStatus parseJavadocAsDetailNode(DetailAST javadocCommentAst) {
    blockCommentLineNumber = javadocCommentAst.getLineNo();
    final String javadocComment = JavadocUtils.getJavadocCommentContent(javadocCommentAst);

    // Use a new error listener each time to be able to use
    // one check instance for multiple files to be checked
    // without getting side effects.
    errorListener = new DescriptiveErrorListener();

    // Log messages should have line number in scope of file,
    // not in scope of Javadoc comment.
    // Offset is line number of beginning of Javadoc comment.
    errorListener.setOffset(javadocCommentAst.getLineNo() - 1);

    final ParseStatus result = new ParseStatus();

    try {
      final ParseTree parseTree = parseJavadocAsParseTree(javadocComment);

      final DetailNode tree = convertParseTreeToDetailNode(parseTree);
      result.setTree(tree);
    } catch (ParseCancellationException ex) {
      // If syntax error occurs then message is printed by error listener
      // and parser throws this runtime exception to stop parsing.
      // Just stop processing current Javadoc comment.
      ParseErrorMessage parseErrorMessage = errorListener.getErrorMessage();

      // There are cases when antlr error listener does not handle syntax error
      if (parseErrorMessage == null) {
        parseErrorMessage =
            new ParseErrorMessage(
                javadocCommentAst.getLineNo(),
                MSG_KEY_UNRECOGNIZED_ANTLR_ERROR,
                javadocCommentAst.getColumnNo(),
                ex.getMessage());
      }

      result.setParseErrorMessage(parseErrorMessage);
    }

    return result;
  }
Example #6
0
 public static ParseStatus syntaxError(Integer tokenNumber, List<Token> tokens, String err) {
   ParseStatus status = new ParseStatus();
   status.columnNumber = -1;
   status.tokenNumber = tokenNumber;
   status.pointSet = new EmptyPointSet();
   if (tokenNumber < tokens.size()) {
     Token token = tokens.get(tokenNumber);
     status.errMsg =
         "Syntax error with token ("
             + token.getText()
             + ") near column "
             + token.getStart()
             + ": "
             + err;
   } else {
     Token token = tokens.get(tokenNumber - 1);
     status.errMsg =
         "Unexpected end of input after token ("
             + token.getText()
             + ") near column "
             + token.getStart()
             + ": context - "
             + err;
   }
   return status;
 }
Example #7
0
  private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) {
    if (itr.isDone() || sts.isError()) {
      return false;
    }

    boolean found = false;

    while (!itr.isDone()) {
      String s = itr.current();
      if (!isVariant(s)) {
        break;
      }
      found = true;
      if (variants.isEmpty()) {
        variants = new ArrayList<>(3);
      }
      variants.add(s);
      sts.parseLength = itr.currentEnd();
      itr.next();
    }

    return found;
  }
Example #8
0
  private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) {
    if (itr.isDone() || sts.isError()) {
      return false;
    }

    boolean found = false;

    String s = itr.current();
    if (isPrivateusePrefix(s)) {
      int start = itr.currentStart();
      StringBuilder sb = new StringBuilder(s);

      itr.next();
      while (!itr.isDone()) {
        s = itr.current();
        if (!isPrivateuseSubtag(s)) {
          break;
        }
        sb.append(SEP).append(s);
        sts.parseLength = itr.currentEnd();

        itr.next();
      }

      if (sts.parseLength <= start) {
        // need at least 1 private subtag
        sts.errorIndex = start;
        sts.errorMsg = "Incomplete privateuse";
      } else {
        privateuse = sb.toString();
        found = true;
      }
    }

    return found;
  }
Example #9
0
  /**
   * Map function for distributed parsing of the CSV files.
   *
   * <p>In first phase it calculates the min, max, means, encodings and other statistics about the
   * dataset, determines the number of columns.
   *
   * <p>The second pass then encodes the parsed dataset to the result key, splitting it into equal
   * sized chunks.
   */
  @Override
  public void map(Key key) {
    try {
      Key aryKey = null;
      boolean arraylet = key._kb[0] == Key.ARRAYLET_CHUNK;
      boolean skipFirstLine = _skipFirstLine;
      if (arraylet) {
        aryKey = ValueArray.getArrayKey(key);
        _chunkId = ValueArray.getChunkIndex(key);
        skipFirstLine = skipFirstLine || (ValueArray.getChunkIndex(key) != 0);
      }
      switch (_phase) {
        case ONE:
          assert (_ncolumns != 0);
          // initialize the column statistics
          phaseOneInitialize();
          // perform the parse
          CsvParser p = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine);
          p.parse(key);
          if (arraylet) {
            long idx = ValueArray.getChunkIndex(key);
            int idx2 = (int) idx;
            assert idx2 == idx;
            assert (_nrows[idx2] == 0)
                : idx
                    + ": "
                    + Arrays.toString(_nrows)
                    + " ("
                    + _nrows[idx2]
                    + " -- "
                    + _myrows
                    + ")";
            _nrows[idx2] = _myrows;
          }
          break;
        case TWO:
          assert (_ncolumns != 0);
          // initialize statistics - invalid rows, sigma and row size
          phaseTwoInitialize();
          // calculate the first row and the number of rows to parse
          int firstRow = 0;
          int lastRow = _myrows;
          _myrows = 0;
          if (arraylet) {
            long origChunkIdx = ValueArray.getChunkIndex(key);
            firstRow = (origChunkIdx == 0) ? 0 : _nrows[(int) origChunkIdx - 1];
            lastRow = _nrows[(int) origChunkIdx];
          }
          int rowsToParse = lastRow - firstRow;
          // create the output streams
          _outputStreams2 = createRecords(firstRow, rowsToParse);
          assert (_outputStreams2.length > 0);
          _ab = _outputStreams2[0].initialize();
          // perform the second parse pass
          CsvParser p2 = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine);
          p2.parse(key);
          // store the last stream if not stored during the parse
          if (_ab != null) _outputStreams2[_outputIdx].store();
          break;
        default:
          assert (false);
      }

      ParseStatus.update(_resultKey, DKV.get(key).length(), _phase);
    } catch (Exception e) {
      e.printStackTrace();
      _error = e.getMessage();
    }
  }
Example #10
0
 public static ParseStatus nextPosition(int pos) {
   ParseStatus status = new ParseStatus();
   status.tokenNumber = pos;
   return status;
 }