private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) { if (itr.isDone() || sts.isError()) { return false; } boolean found = false; while (!itr.isDone()) { String s = itr.current(); if (!isExtlang(s)) { break; } found = true; if (extlangs.isEmpty()) { extlangs = new ArrayList<>(3); } extlangs.add(s); sts.parseLength = itr.currentEnd(); itr.next(); if (extlangs.size() == 3) { // Maximum 3 extlangs break; } } return found; }
private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) { if (itr.isDone() || sts.isError()) { return false; } boolean found = false; String s = itr.current(); if (isRegion(s)) { found = true; region = s; sts.parseLength = itr.currentEnd(); itr.next(); } return found; }
private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) { if (itr.isDone() || sts.isError()) { return false; } boolean found = false; while (!itr.isDone()) { String s = itr.current(); if (isExtensionSingleton(s)) { int start = itr.currentStart(); String singleton = s; StringBuilder sb = new StringBuilder(singleton); itr.next(); while (!itr.isDone()) { s = itr.current(); if (isExtensionSubtag(s)) { sb.append(SEP).append(s); sts.parseLength = itr.currentEnd(); } else { break; } itr.next(); } if (sts.parseLength <= start) { sts.errorIndex = start; sts.errorMsg = "Incomplete extension '" + singleton + "'"; break; } if (extensions.isEmpty()) { extensions = new ArrayList<>(4); } extensions.add(sb.toString()); found = true; } else { break; } } return found; }
/* * BNF in RFC5646 * * Language-Tag = langtag ; normal language tags * / privateuse ; private use tag * / grandfathered ; grandfathered tags * * * langtag = language * ["-" script] * ["-" region] * *("-" variant) * *("-" extension) * ["-" privateuse] * * language = 2*3ALPHA ; shortest ISO 639 code * ["-" extlang] ; sometimes followed by * ; extended language subtags * / 4ALPHA ; or reserved for future use * / 5*8ALPHA ; or registered language subtag * * extlang = 3ALPHA ; selected ISO 639 codes * *2("-" 3ALPHA) ; permanently reserved * * script = 4ALPHA ; ISO 15924 code * * region = 2ALPHA ; ISO 3166-1 code * / 3DIGIT ; UN M.49 code * * variant = 5*8alphanum ; registered variants * / (DIGIT 3alphanum) * * extension = singleton 1*("-" (2*8alphanum)) * * ; Single alphanumerics * ; "x" reserved for private use * singleton = DIGIT ; 0 - 9 * / %x41-57 ; A - W * / %x59-5A ; Y - Z * / %x61-77 ; a - w * / %x79-7A ; y - z * * privateuse = "x" 1*("-" (1*8alphanum)) * */ public static LanguageTag parse(String languageTag, ParseStatus sts) { if (sts == null) { sts = new ParseStatus(); } else { sts.reset(); } StringTokenIterator itr; // Check if the tag is grandfathered String[] gfmap = GRANDFATHERED.get(LocaleUtils.toLowerString(languageTag)); if (gfmap != null) { // use preferred mapping itr = new StringTokenIterator(gfmap[1], SEP); } else { itr = new StringTokenIterator(languageTag, SEP); } LanguageTag tag = new LanguageTag(); // langtag must start with either language or privateuse if (tag.parseLanguage(itr, sts)) { tag.parseExtlangs(itr, sts); tag.parseScript(itr, sts); tag.parseRegion(itr, sts); tag.parseVariants(itr, sts); tag.parseExtensions(itr, sts); } tag.parsePrivateuse(itr, sts); if (!itr.isDone() && !sts.isError()) { String s = itr.current(); sts.errorIndex = itr.currentStart(); if (s.length() == 0) { sts.errorMsg = "Empty subtag"; } else { sts.errorMsg = "Invalid subtag: " + s; } } return tag; }
/** * Parses Javadoc comment as DetailNode tree. * * @param javadocCommentAst DetailAST of Javadoc comment * @return DetailNode tree of Javadoc comment */ public ParseStatus parseJavadocAsDetailNode(DetailAST javadocCommentAst) { blockCommentLineNumber = javadocCommentAst.getLineNo(); final String javadocComment = JavadocUtils.getJavadocCommentContent(javadocCommentAst); // Use a new error listener each time to be able to use // one check instance for multiple files to be checked // without getting side effects. errorListener = new DescriptiveErrorListener(); // Log messages should have line number in scope of file, // not in scope of Javadoc comment. // Offset is line number of beginning of Javadoc comment. errorListener.setOffset(javadocCommentAst.getLineNo() - 1); final ParseStatus result = new ParseStatus(); try { final ParseTree parseTree = parseJavadocAsParseTree(javadocComment); final DetailNode tree = convertParseTreeToDetailNode(parseTree); result.setTree(tree); } catch (ParseCancellationException ex) { // If syntax error occurs then message is printed by error listener // and parser throws this runtime exception to stop parsing. // Just stop processing current Javadoc comment. ParseErrorMessage parseErrorMessage = errorListener.getErrorMessage(); // There are cases when antlr error listener does not handle syntax error if (parseErrorMessage == null) { parseErrorMessage = new ParseErrorMessage( javadocCommentAst.getLineNo(), MSG_KEY_UNRECOGNIZED_ANTLR_ERROR, javadocCommentAst.getColumnNo(), ex.getMessage()); } result.setParseErrorMessage(parseErrorMessage); } return result; }
public static ParseStatus syntaxError(Integer tokenNumber, List<Token> tokens, String err) { ParseStatus status = new ParseStatus(); status.columnNumber = -1; status.tokenNumber = tokenNumber; status.pointSet = new EmptyPointSet(); if (tokenNumber < tokens.size()) { Token token = tokens.get(tokenNumber); status.errMsg = "Syntax error with token (" + token.getText() + ") near column " + token.getStart() + ": " + err; } else { Token token = tokens.get(tokenNumber - 1); status.errMsg = "Unexpected end of input after token (" + token.getText() + ") near column " + token.getStart() + ": context - " + err; } return status; }
private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) { if (itr.isDone() || sts.isError()) { return false; } boolean found = false; while (!itr.isDone()) { String s = itr.current(); if (!isVariant(s)) { break; } found = true; if (variants.isEmpty()) { variants = new ArrayList<>(3); } variants.add(s); sts.parseLength = itr.currentEnd(); itr.next(); } return found; }
private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { if (itr.isDone() || sts.isError()) { return false; } boolean found = false; String s = itr.current(); if (isPrivateusePrefix(s)) { int start = itr.currentStart(); StringBuilder sb = new StringBuilder(s); itr.next(); while (!itr.isDone()) { s = itr.current(); if (!isPrivateuseSubtag(s)) { break; } sb.append(SEP).append(s); sts.parseLength = itr.currentEnd(); itr.next(); } if (sts.parseLength <= start) { // need at least 1 private subtag sts.errorIndex = start; sts.errorMsg = "Incomplete privateuse"; } else { privateuse = sb.toString(); found = true; } } return found; }
/** * Map function for distributed parsing of the CSV files. * * <p>In first phase it calculates the min, max, means, encodings and other statistics about the * dataset, determines the number of columns. * * <p>The second pass then encodes the parsed dataset to the result key, splitting it into equal * sized chunks. */ @Override public void map(Key key) { try { Key aryKey = null; boolean arraylet = key._kb[0] == Key.ARRAYLET_CHUNK; boolean skipFirstLine = _skipFirstLine; if (arraylet) { aryKey = ValueArray.getArrayKey(key); _chunkId = ValueArray.getChunkIndex(key); skipFirstLine = skipFirstLine || (ValueArray.getChunkIndex(key) != 0); } switch (_phase) { case ONE: assert (_ncolumns != 0); // initialize the column statistics phaseOneInitialize(); // perform the parse CsvParser p = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine); p.parse(key); if (arraylet) { long idx = ValueArray.getChunkIndex(key); int idx2 = (int) idx; assert idx2 == idx; assert (_nrows[idx2] == 0) : idx + ": " + Arrays.toString(_nrows) + " (" + _nrows[idx2] + " -- " + _myrows + ")"; _nrows[idx2] = _myrows; } break; case TWO: assert (_ncolumns != 0); // initialize statistics - invalid rows, sigma and row size phaseTwoInitialize(); // calculate the first row and the number of rows to parse int firstRow = 0; int lastRow = _myrows; _myrows = 0; if (arraylet) { long origChunkIdx = ValueArray.getChunkIndex(key); firstRow = (origChunkIdx == 0) ? 0 : _nrows[(int) origChunkIdx - 1]; lastRow = _nrows[(int) origChunkIdx]; } int rowsToParse = lastRow - firstRow; // create the output streams _outputStreams2 = createRecords(firstRow, rowsToParse); assert (_outputStreams2.length > 0); _ab = _outputStreams2[0].initialize(); // perform the second parse pass CsvParser p2 = new CsvParser(aryKey, _ncolumns, _sep, _decSep, this, skipFirstLine); p2.parse(key); // store the last stream if not stored during the parse if (_ab != null) _outputStreams2[_outputIdx].store(); break; default: assert (false); } ParseStatus.update(_resultKey, DKV.get(key).length(), _phase); } catch (Exception e) { e.printStackTrace(); _error = e.getMessage(); } }
public static ParseStatus nextPosition(int pos) { ParseStatus status = new ParseStatus(); status.tokenNumber = pos; return status; }