/* * BNF in RFC5646 * * Language-Tag = langtag ; normal language tags * / privateuse ; private use tag * / grandfathered ; grandfathered tags * * * langtag = language * ["-" script] * ["-" region] * *("-" variant) * *("-" extension) * ["-" privateuse] * * language = 2*3ALPHA ; shortest ISO 639 code * ["-" extlang] ; sometimes followed by * ; extended language subtags * / 4ALPHA ; or reserved for future use * / 5*8ALPHA ; or registered language subtag * * extlang = 3ALPHA ; selected ISO 639 codes * *2("-" 3ALPHA) ; permanently reserved * * script = 4ALPHA ; ISO 15924 code * * region = 2ALPHA ; ISO 3166-1 code * / 3DIGIT ; UN M.49 code * * variant = 5*8alphanum ; registered variants * / (DIGIT 3alphanum) * * extension = singleton 1*("-" (2*8alphanum)) * * ; Single alphanumerics * ; "x" reserved for private use * singleton = DIGIT ; 0 - 9 * / %x41-57 ; A - W * / %x59-5A ; Y - Z * / %x61-77 ; a - w * / %x79-7A ; y - z * * privateuse = "x" 1*("-" (1*8alphanum)) * */ public static LanguageTag parse(String languageTag, ParseStatus sts) { if (sts == null) { sts = new ParseStatus(); } else { sts.reset(); } StringTokenIterator itr; // Check if the tag is grandfathered String[] gfmap = GRANDFATHERED.get(LocaleUtils.toLowerString(languageTag)); if (gfmap != null) { // use preferred mapping itr = new StringTokenIterator(gfmap[1], SEP); } else { itr = new StringTokenIterator(languageTag, SEP); } LanguageTag tag = new LanguageTag(); // langtag must start with either language or privateuse if (tag.parseLanguage(itr, sts)) { tag.parseExtlangs(itr, sts); tag.parseScript(itr, sts); tag.parseRegion(itr, sts); tag.parseVariants(itr, sts); tag.parseExtensions(itr, sts); } tag.parsePrivateuse(itr, sts); if (!itr.isDone() && !sts.isError()) { String s = itr.current(); sts.errorIndex = itr.currentStart(); if (s.length() == 0) { sts.errorMsg = "Empty subtag"; } else { sts.errorMsg = "Invalid subtag: " + s; } } return tag; }
private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) { if (itr.isDone() || sts.isError()) { return false; } boolean found = false; while (!itr.isDone()) { String s = itr.current(); if (isExtensionSingleton(s)) { int start = itr.currentStart(); String singleton = s; StringBuilder sb = new StringBuilder(singleton); itr.next(); while (!itr.isDone()) { s = itr.current(); if (isExtensionSubtag(s)) { sb.append(SEP).append(s); sts.parseLength = itr.currentEnd(); } else { break; } itr.next(); } if (sts.parseLength <= start) { sts.errorIndex = start; sts.errorMsg = "Incomplete extension '" + singleton + "'"; break; } if (extensions.isEmpty()) { extensions = new ArrayList<>(4); } extensions.add(sb.toString()); found = true; } else { break; } } return found; }
private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { if (itr.isDone() || sts.isError()) { return false; } boolean found = false; String s = itr.current(); if (isPrivateusePrefix(s)) { int start = itr.currentStart(); StringBuilder sb = new StringBuilder(s); itr.next(); while (!itr.isDone()) { s = itr.current(); if (!isPrivateuseSubtag(s)) { break; } sb.append(SEP).append(s); sts.parseLength = itr.currentEnd(); itr.next(); } if (sts.parseLength <= start) { // need at least 1 private subtag sts.errorIndex = start; sts.errorMsg = "Incomplete privateuse"; } else { privateuse = sb.toString(); found = true; } } return found; }