예제 #1
0
  /*
   * BNF in RFC5646
   *
   * Language-Tag  = langtag             ; normal language tags
   *               / privateuse          ; private use tag
   *               / grandfathered       ; grandfathered tags
   *
   *
   * langtag       = language
   *                 ["-" script]
   *                 ["-" region]
   *                 *("-" variant)
   *                 *("-" extension)
   *                 ["-" privateuse]
   *
   * language      = 2*3ALPHA            ; shortest ISO 639 code
   *                 ["-" extlang]       ; sometimes followed by
   *                                     ; extended language subtags
   *               / 4ALPHA              ; or reserved for future use
   *               / 5*8ALPHA            ; or registered language subtag
   *
   * extlang       = 3ALPHA              ; selected ISO 639 codes
   *                 *2("-" 3ALPHA)      ; permanently reserved
   *
   * script        = 4ALPHA              ; ISO 15924 code
   *
   * region        = 2ALPHA              ; ISO 3166-1 code
   *               / 3DIGIT              ; UN M.49 code
   *
   * variant       = 5*8alphanum         ; registered variants
   *               / (DIGIT 3alphanum)
   *
   * extension     = singleton 1*("-" (2*8alphanum))
   *
   *                                     ; Single alphanumerics
   *                                     ; "x" reserved for private use
   * singleton     = DIGIT               ; 0 - 9
   *               / %x41-57             ; A - W
   *               / %x59-5A             ; Y - Z
   *               / %x61-77             ; a - w
   *               / %x79-7A             ; y - z
   *
   * privateuse    = "x" 1*("-" (1*8alphanum))
   *
   */
  public static LanguageTag parse(String languageTag, ParseStatus sts) {
    if (sts == null) {
      sts = new ParseStatus();
    } else {
      sts.reset();
    }

    StringTokenIterator itr;

    // Check if the tag is grandfathered
    String[] gfmap = GRANDFATHERED.get(LocaleUtils.toLowerString(languageTag));
    if (gfmap != null) {
      // use preferred mapping
      itr = new StringTokenIterator(gfmap[1], SEP);
    } else {
      itr = new StringTokenIterator(languageTag, SEP);
    }

    LanguageTag tag = new LanguageTag();

    // langtag must start with either language or privateuse
    if (tag.parseLanguage(itr, sts)) {
      tag.parseExtlangs(itr, sts);
      tag.parseScript(itr, sts);
      tag.parseRegion(itr, sts);
      tag.parseVariants(itr, sts);
      tag.parseExtensions(itr, sts);
    }
    tag.parsePrivateuse(itr, sts);

    if (!itr.isDone() && !sts.isError()) {
      String s = itr.current();
      sts.errorIndex = itr.currentStart();
      if (s.length() == 0) {
        sts.errorMsg = "Empty subtag";
      } else {
        sts.errorMsg = "Invalid subtag: " + s;
      }
    }

    return tag;
  }
예제 #2
0
  public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) {
    LanguageTag tag = new LanguageTag();

    String language = baseLocale.getLanguage();
    String script = baseLocale.getScript();
    String region = baseLocale.getRegion();
    String variant = baseLocale.getVariant();

    boolean hasSubtag = false;

    String privuseVar = null; // store ill-formed variant subtags

    if (isLanguage(language)) {
      // Convert a deprecated language code to its new code
      if (language.equals("iw")) {
        language = "he";
      } else if (language.equals("ji")) {
        language = "yi";
      } else if (language.equals("in")) {
        language = "id";
      }
      tag.language = language;
    }

    if (isScript(script)) {
      tag.script = canonicalizeScript(script);
      hasSubtag = true;
    }

    if (isRegion(region)) {
      tag.region = canonicalizeRegion(region);
      hasSubtag = true;
    }

    // Special handling for no_NO_NY - use nn_NO for language tag
    if (tag.language.equals("no") && tag.region.equals("NO") && variant.equals("NY")) {
      tag.language = "nn";
      variant = "";
    }

    if (variant.length() > 0) {
      List<String> variants = null;
      StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP);
      while (!varitr.isDone()) {
        String var = varitr.current();
        if (!isVariant(var)) {
          break;
        }
        if (variants == null) {
          variants = new ArrayList<>();
        }
        variants.add(var); // Do not canonicalize!
        varitr.next();
      }
      if (variants != null) {
        tag.variants = variants;
        hasSubtag = true;
      }
      if (!varitr.isDone()) {
        // ill-formed variant subtags
        StringJoiner sj = new StringJoiner(SEP);
        while (!varitr.isDone()) {
          String prvv = varitr.current();
          if (!isPrivateuseSubtag(prvv)) {
            // cannot use private use subtag - truncated
            break;
          }
          sj.add(prvv);
          varitr.next();
        }
        if (sj.length() > 0) {
          privuseVar = sj.toString();
        }
      }
    }

    List<String> extensions = null;
    String privateuse = null;

    if (localeExtensions != null) {
      Set<Character> locextKeys = localeExtensions.getKeys();
      for (Character locextKey : locextKeys) {
        Extension ext = localeExtensions.getExtension(locextKey);
        if (isPrivateusePrefixChar(locextKey)) {
          privateuse = ext.getValue();
        } else {
          if (extensions == null) {
            extensions = new ArrayList<>();
          }
          extensions.add(locextKey.toString() + SEP + ext.getValue());
        }
      }
    }

    if (extensions != null) {
      tag.extensions = extensions;
      hasSubtag = true;
    }

    // append ill-formed variant subtags to private use
    if (privuseVar != null) {
      if (privateuse == null) {
        privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar;
      } else {
        privateuse =
            privateuse
                + SEP
                + PRIVUSE_VARIANT_PREFIX
                + SEP
                + privuseVar.replace(BaseLocale.SEP, SEP);
      }
    }

    if (privateuse != null) {
      tag.privateuse = privateuse;
    }

    if (tag.language.length() == 0 && (hasSubtag || privateuse == null)) {
      // use lang "und" when 1) no language is available AND
      // 2) any of other subtags other than private use are available or
      // no private use tag is available
      tag.language = UNDETERMINED;
    }

    return tag;
  }