Beispiel #1
0
  public static StringBuffer convertToUnicode(UCharacterIterator iter, int options)
      throws StringPrepParseException {

    // the source contains all ascii codepoints
    boolean srcIsASCII = true;

    int ch;
    int saveIndex = iter.getIndex();
    // step 1: find out if all the codepoints in src are ASCII
    while ((ch = iter.next()) != UCharacterIterator.DONE) {
      if (ch > 0x7F) {
        srcIsASCII = false;
        break;
      }
    }

    // The RFC states that
    // <quote>
    // ToUnicode never fails. If any step fails, then the original input
    // is returned immediately in that step.
    // </quote>
    do {
      StringBuffer processOut;
      if (srcIsASCII == false) {
        // step 2: process the string
        iter.setIndex(saveIndex);
        try {
          processOut = transform.prepare(iter, options);
        } catch (StringPrepParseException e) {
          break;
        }
      } else {
        // just point to source
        processOut = new StringBuffer(iter.getText());
      }

      // step 3: verify ACE Prefix
      if (startsWithPrefix(processOut)) {

        // step 4: Remove the ACE Prefix
        String temp = processOut.substring(ACE_PREFIX_LENGTH, processOut.length());

        // step 5: Decode using punycode
        StringBuffer decodeOut = null;
        try {
          decodeOut = PunycodeReference.decode(new StringBuffer(temp), null);
        } catch (StringPrepParseException e) {
          break;
        }

        // step 6:Apply toASCII
        StringBuffer toASCIIOut = convertToASCII(decodeOut, options);

        // step 7: verify
        if (compareCaseInsensitiveASCII(processOut, toASCIIOut) != 0) {
          break;
        }
        // step 8: return output of step 5
        return decodeOut;
      }
    } while (false);

    return new StringBuffer(iter.getText());
  }
Beispiel #2
0
  public static StringBuffer convertToASCII(UCharacterIterator srcIter, int options)
      throws StringPrepParseException {

    char[] caseFlags = null;

    // the source contains all ascii codepoints
    boolean srcIsASCII = true;
    // assume the source contains all LDH codepoints
    boolean srcIsLDH = true;

    // get the options
    boolean useSTD3ASCIIRules = ((options & USE_STD3_RULES) != 0);

    int ch;
    // step 1
    while ((ch = srcIter.next()) != UCharacterIterator.DONE) {
      if (ch > 0x7f) {
        srcIsASCII = false;
      }
    }
    int failPos = -1;
    srcIter.setToStart();
    StringBuffer processOut = null;
    // step 2 is performed only if the source contains non ASCII
    if (!srcIsASCII) {
      // step 2
      processOut = transform.prepare(srcIter, options);
    } else {
      processOut = new StringBuffer(srcIter.getText());
    }
    int poLen = processOut.length();
    if (poLen == 0) {
      throw new StringPrepParseException(
          "Found zero length lable after NamePrep.", StringPrepParseException.ZERO_LENGTH_LABEL);
    }
    StringBuffer dest = new StringBuffer();

    // reset the variable to verify if output of prepare is ASCII or not
    srcIsASCII = true;

    // step 3 & 4
    for (int j = 0; j < poLen; j++) {
      ch = processOut.charAt(j);
      if (ch > 0x7F) {
        srcIsASCII = false;
      } else if (isLDHChar(ch) == false) {
        // here we do not assemble surrogates
        // since we know that LDH code points
        // are in the ASCII range only
        srcIsLDH = false;
        failPos = j;
      }
    }

    if (useSTD3ASCIIRules == true) {
      // verify 3a and 3b
      if (srcIsLDH == false /* source contains some non-LDH characters */
          || processOut.charAt(0) == HYPHEN
          || processOut.charAt(processOut.length() - 1) == HYPHEN) {

        /* populate the parseError struct */
        if (srcIsLDH == false) {
          throw new StringPrepParseException(
              "The input does not conform to the STD 3 ASCII rules",
              StringPrepParseException.STD3_ASCII_RULES_ERROR,
              processOut.toString(),
              (failPos > 0) ? (failPos - 1) : failPos);
        } else if (processOut.charAt(0) == HYPHEN) {
          throw new StringPrepParseException(
              "The input does not conform to the STD 3 ASCII rules",
              StringPrepParseException.STD3_ASCII_RULES_ERROR,
              processOut.toString(),
              0);

        } else {
          throw new StringPrepParseException(
              "The input does not conform to the STD 3 ASCII rules",
              StringPrepParseException.STD3_ASCII_RULES_ERROR,
              processOut.toString(),
              (poLen > 0) ? poLen - 1 : poLen);
        }
      }
    }
    if (srcIsASCII) {
      dest = processOut;
    } else {
      // step 5 : verify the sequence does not begin with ACE prefix
      if (!startsWithPrefix(processOut)) {

        // step 6: encode the sequence with punycode
        StringBuffer punyout = PunycodeReference.encode(processOut, caseFlags);

        // convert all codepoints to lower case ASCII
        StringBuffer lowerOut = toASCIILower(punyout);

        // Step 7: prepend the ACE prefix
        dest.append(ACE_PREFIX, 0, ACE_PREFIX_LENGTH);
        // Step 6: copy the contents in b2 into dest
        dest.append(lowerOut);
      } else {
        throw new StringPrepParseException(
            "The input does not start with the ACE Prefix.",
            StringPrepParseException.ACE_PREFIX_ERROR,
            processOut.toString(),
            0);
      }
    }
    if (dest.length() > MAX_LABEL_LENGTH) {
      throw new StringPrepParseException(
          "The labels in the input are too long. Length > 64.",
          StringPrepParseException.LABEL_TOO_LONG_ERROR,
          dest.toString(),
          0);
    }
    return dest;
  }