/* * BNF in RFC5646 * * Language-Tag = langtag ; normal language tags * / privateuse ; private use tag * / grandfathered ; grandfathered tags * * * langtag = language * ["-" script] * ["-" region] * *("-" variant) * *("-" extension) * ["-" privateuse] * * language = 2*3ALPHA ; shortest ISO 639 code * ["-" extlang] ; sometimes followed by * ; extended language subtags * / 4ALPHA ; or reserved for future use * / 5*8ALPHA ; or registered language subtag * * extlang = 3ALPHA ; selected ISO 639 codes * *2("-" 3ALPHA) ; permanently reserved * * script = 4ALPHA ; ISO 15924 code * * region = 2ALPHA ; ISO 3166-1 code * / 3DIGIT ; UN M.49 code * * variant = 5*8alphanum ; registered variants * / (DIGIT 3alphanum) * * extension = singleton 1*("-" (2*8alphanum)) * * ; Single alphanumerics * ; "x" reserved for private use * singleton = DIGIT ; 0 - 9 * / %x41-57 ; A - W * / %x59-5A ; Y - Z * / %x61-77 ; a - w * / %x79-7A ; y - z * * privateuse = "x" 1*("-" (1*8alphanum)) * */ public static LanguageTag parse(String languageTag, ParseStatus sts) { if (sts == null) { sts = new ParseStatus(); } else { sts.reset(); } StringTokenIterator itr; // Check if the tag is grandfathered String[] gfmap = GRANDFATHERED.get(LocaleUtils.toLowerString(languageTag)); if (gfmap != null) { // use preferred mapping itr = new StringTokenIterator(gfmap[1], SEP); } else { itr = new StringTokenIterator(languageTag, SEP); } LanguageTag tag = new LanguageTag(); // langtag must start with either language or privateuse if (tag.parseLanguage(itr, sts)) { tag.parseExtlangs(itr, sts); tag.parseScript(itr, sts); tag.parseRegion(itr, sts); tag.parseVariants(itr, sts); tag.parseExtensions(itr, sts); } tag.parsePrivateuse(itr, sts); if (!itr.isDone() && !sts.isError()) { String s = itr.current(); sts.errorIndex = itr.currentStart(); if (s.length() == 0) { sts.errorMsg = "Empty subtag"; } else { sts.errorMsg = "Invalid subtag: " + s; } } return tag; }
public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { LanguageTag tag = new LanguageTag(); String language = baseLocale.getLanguage(); String script = baseLocale.getScript(); String region = baseLocale.getRegion(); String variant = baseLocale.getVariant(); boolean hasSubtag = false; String privuseVar = null; // store ill-formed variant subtags if (isLanguage(language)) { // Convert a deprecated language code to its new code if (language.equals("iw")) { language = "he"; } else if (language.equals("ji")) { language = "yi"; } else if (language.equals("in")) { language = "id"; } tag.language = language; } if (isScript(script)) { tag.script = canonicalizeScript(script); hasSubtag = true; } if (isRegion(region)) { tag.region = canonicalizeRegion(region); hasSubtag = true; } // Special handling for no_NO_NY - use nn_NO for language tag if (tag.language.equals("no") && tag.region.equals("NO") && variant.equals("NY")) { tag.language = "nn"; variant = ""; } if (variant.length() > 0) { List<String> variants = null; StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP); while (!varitr.isDone()) { String var = varitr.current(); if (!isVariant(var)) { break; } if (variants == null) { variants = new ArrayList<>(); } variants.add(var); // Do not canonicalize! varitr.next(); } if (variants != null) { tag.variants = variants; hasSubtag = true; } if (!varitr.isDone()) { // ill-formed variant subtags StringJoiner sj = new StringJoiner(SEP); while (!varitr.isDone()) { String prvv = varitr.current(); if (!isPrivateuseSubtag(prvv)) { // cannot use private use subtag - truncated break; } sj.add(prvv); varitr.next(); } if (sj.length() > 0) { privuseVar = sj.toString(); } } } List<String> extensions = null; String privateuse = null; if (localeExtensions != null) { Set<Character> locextKeys = localeExtensions.getKeys(); for (Character locextKey : locextKeys) { Extension ext = localeExtensions.getExtension(locextKey); if (isPrivateusePrefixChar(locextKey)) { privateuse = ext.getValue(); } else { if (extensions == null) { extensions = new ArrayList<>(); } extensions.add(locextKey.toString() + SEP + ext.getValue()); } } } if (extensions != null) { tag.extensions = extensions; hasSubtag = true; } // append ill-formed variant subtags to private use if (privuseVar != null) { if (privateuse == null) { privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar; } else { privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.replace(BaseLocale.SEP, SEP); } } if (privateuse != null) { tag.privateuse = privateuse; } if (tag.language.length() == 0 && (hasSubtag || privateuse == null)) { // use lang "und" when 1) no language is available AND // 2) any of other subtags other than private use are available or // no private use tag is available tag.language = UNDETERMINED; } return tag; }