Пример #1
0
 public RubyEncoding getEncoding(Encoding enc) {
   int index = enc.getIndex();
   RubyEncoding rubyEncoding;
   if (index < encodingIndex.length && (rubyEncoding = encodingIndex[index]) != null) {
     return rubyEncoding;
   }
   enc = loadEncoding(new ByteList(enc.getName(), false));
   return encodingIndex[enc.getIndex()];
 }
Пример #2
0
  private void defineEncodingConstants(
      Ruby runtime, RubyEncoding encoding, byte[] name, int p, int end) {
    Encoding enc = ASCIIEncoding.INSTANCE;
    int s = p;

    int code = name[s] & 0xff;
    if (enc.isDigit(code)) return;

    boolean hasUpper = false;
    boolean hasLower = false;
    if (enc.isUpper(code)) {
      hasUpper = true;
      while (++s < end && (enc.isAlnum(name[s] & 0xff) || name[s] == (byte) '_')) {
        if (enc.isLower(name[s] & 0xff)) hasLower = true;
      }
    }

    boolean isValid = false;
    if (s >= end) {
      isValid = true;
      defineEncodingConstant(runtime, encoding, name, p, end);
    }

    if (!isValid || hasLower) {
      if (!hasLower || !hasUpper) {
        do {
          code = name[s] & 0xff;
          if (enc.isLower(code)) hasLower = true;
          if (enc.isUpper(code)) hasUpper = true;
        } while (++s < end && (!hasLower || !hasUpper));
      }

      byte[] constName = new byte[end - p];
      System.arraycopy(name, p, constName, 0, end - p);
      s = 0;
      code = constName[s] & 0xff;

      if (!isValid) {
        if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
        for (; s < constName.length; ++s) {
          if (!enc.isAlnum(constName[s] & 0xff)) constName[s] = (byte) '_';
        }
        if (hasUpper) {
          defineEncodingConstant(runtime, encoding, constName, 0, constName.length);
        }
      }
      if (hasLower) {
        for (s = 0; s < constName.length; ++s) {
          code = constName[s] & 0xff;
          if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code];
        }
        defineEncodingConstant(runtime, encoding, constName, 0, constName.length);
      }
    }
  }
Пример #3
0
 public Encoding loadEncoding(ByteList name) {
   Entry entry = findEncodingOrAliasEntry(name);
   if (entry == null) return null;
   Encoding enc = entry.getEncoding(); // load the encoding
   int index = enc.getIndex();
   if (index >= encodingIndex.length) {
     RubyEncoding tmp[] = new RubyEncoding[index + 4];
     System.arraycopy(encodingIndex, 0, tmp, 0, encodingIndex.length);
     encodingIndex = tmp;
   }
   encodingIndex[index] = (RubyEncoding) encodingList[entry.getIndex()];
   return enc;
 }
Пример #4
0
  /**
   * Get a java.nio Charset for the given encoding, or null if impossible
   *
   * @param encoding the encoding
   * @return the charset
   */
  public Charset charsetForEncoding(Encoding encoding) {
    Charset charset = encoding.getCharset();

    if (encoding.toString().equals("ASCII-8BIT")) {
      return Charset.forName("ASCII");
    }

    try {
      return Charset.forName(encoding.toString());
    } catch (UnsupportedCharsetException uce) {
      throw runtime.newEncodingCompatibilityError(
          "no java.nio.charset.Charset found for encoding `" + encoding.toString() + "'");
    }
  }
Пример #5
0
 @JRubyMethod(name = "chr", compat = CompatVersion.RUBY1_9)
 public RubyString chr19(ThreadContext context, IRubyObject arg) {
   Ruby runtime = context.getRuntime();
   long value = getLongValue();
   Encoding enc = arg.convertToString().toEncoding(runtime);
   int n;
   if (value < 0 || (n = StringSupport.codeLength(runtime, enc, (int) value)) <= 0) {
     throw runtime.newRangeError(this.toString() + " out of char range");
   }
   ByteList bytes = new ByteList(n);
   enc.codeToMbc((int) value, bytes.getUnsafeBytes(), 0);
   bytes.setRealSize(n);
   return RubyString.newStringNoCopy(runtime, bytes, enc, 0);
 }
Пример #6
0
  public static Encoding areCompatible(IRubyObject obj1, IRubyObject obj2) {
    Encoding enc1 = null;
    Encoding enc2 = null;

    if (obj1 instanceof RubyEncoding) {
      enc1 = ((RubyEncoding) obj1).getEncoding();
    } else if (obj1 instanceof EncodingCapable) {
      enc1 = ((EncodingCapable) obj1).getEncoding();
    }

    if (obj2 instanceof RubyEncoding) {
      enc2 = ((RubyEncoding) obj2).getEncoding();
    } else if (obj2 instanceof EncodingCapable) {
      enc2 = ((EncodingCapable) obj2).getEncoding();
    }

    if (enc1 != null && enc2 != null) {
      if (enc1 == enc2) return enc1;

      if (obj2 instanceof RubyString && ((RubyString) obj2).getByteList().getRealSize() == 0)
        return enc1;
      if (obj1 instanceof RubyString && ((RubyString) obj1).getByteList().getRealSize() == 0)
        return enc2;

      if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null;

      if (!(obj2 instanceof RubyString) && enc2 instanceof USASCIIEncoding) return enc1;
      if (!(obj1 instanceof RubyString) && enc1 instanceof USASCIIEncoding) return enc2;

      if (!(obj1 instanceof RubyString)) {
        IRubyObject objTmp = obj1;
        obj1 = obj2;
        obj1 = objTmp;

        Encoding encTmp = enc1;
        enc1 = enc2;
        enc2 = encTmp;
      }

      if (obj1 instanceof RubyString) {
        int cr1 = ((RubyString) obj1).scanForCodeRange();
        if (obj2 instanceof RubyString) {
          int cr2 = ((RubyString) obj2).scanForCodeRange();
          return areCompatible(enc1, cr1, enc2, cr2);
        }
        if (cr1 == StringSupport.CR_7BIT) return enc2;
      }
    }
    return null;
  }
Пример #7
0
  private ByteList fromEncodedBytes(Ruby runtime, Encoding enc, int value) {
    int n;
    try {
      n = value < 0 ? 0 : enc.codeToMbcLength(value);
    } catch (EncodingException ee) {
      n = 0;
    }

    if (n <= 0) throw runtime.newRangeError(this.toString() + " out of char range");

    ByteList bytes = new ByteList(n);
    enc.codeToMbc(value, bytes.getUnsafeBytes(), 0);
    bytes.setRealSize(n);
    return bytes;
  }
Пример #8
0
 @JRubyMethod
 public IRubyObject convpath(ThreadContext context) {
   // we always pass through UTF-16
   IRubyObject utf16Encoding =
       context.runtime.getEncodingService().getEncodingList()[UTF16.getIndex()];
   return RubyArray.newArray(
       context.runtime,
       RubyArray.newArray(context.runtime, srcEncoding, utf16Encoding),
       RubyArray.newArray(context.runtime, utf16Encoding, destEncoding));
 }
Пример #9
0
 @JRubyMethod
 public IRubyObject convpath(ThreadContext context) {
   Ruby runtime = context.runtime;
   EncodingService encodingService = runtime.getEncodingService();
   // we always pass through UTF-16
   IRubyObject utf16Encoding = encodingService.getEncodingList()[UTF16.getIndex()];
   return RubyArray.newArray(
       runtime,
       RubyArray.newArray(runtime, source_encoding(context), utf16Encoding),
       RubyArray.newArray(runtime, utf16Encoding, destination_encoding(context)));
 }
Пример #10
0
  private void updateCharOffset() {
    if (charOffsetUpdated) return;

    ByteList value = str.getByteList();
    Encoding enc = value.getEncoding();

    if (regs == null) {
      updateCharOffsetOnlyOneReg(value, enc);
      return;
    }

    int numRegs = regs.numRegs;

    if (charOffsets == null || charOffsets.numRegs < numRegs) charOffsets = new Region(numRegs);

    if (enc.maxLength() == 1) {
      for (int i = 0; i < numRegs; i++) {
        charOffsets.beg[i] = regs.beg[i];
        charOffsets.end[i] = regs.end[i];
      }

      charOffsetUpdated = true;
      return;
    }

    Pair[] pairs = new Pair[numRegs * 2];
    for (int i = 0; i < pairs.length; i++) pairs[i] = new Pair();

    int numPos = 0;
    for (int i = 0; i < numRegs; i++) {
      if (regs.beg[i] < 0) continue;
      pairs[numPos++].bytePos = regs.beg[i];
      pairs[numPos++].bytePos = regs.end[i];
    }

    updatePairs(value, enc, pairs);

    charOffsetUpdated = true;
  }
Пример #11
0
 @JRubyMethod(
     name = {"include?", "member?"},
     frame = true,
     compat = CompatVersion.RUBY1_9)
 public IRubyObject include_p19(ThreadContext context, IRubyObject obj) {
   Ruby runtime = context.getRuntime();
   if (begin instanceof RubyNumeric
       || end instanceof RubyNumeric
       || !TypeConverter.convertToTypeWithCheck(begin, runtime.getInteger(), "to_int").isNil()
       || !TypeConverter.convertToTypeWithCheck(end, runtime.getInteger(), "to_int").isNil()) {
     if (rangeLe(context, begin, obj) != null) {
       if (isExclusive) {
         if (rangeLt(context, obj, end) != null) return runtime.getTrue();
       } else {
         if (rangeLe(context, obj, end) != null) return runtime.getTrue();
       }
     }
     return runtime.getFalse();
   } else if (begin instanceof RubyString
       && end instanceof RubyString
       && ((RubyString) begin).getByteList().realSize == 1
       && ((RubyString) end).getByteList().realSize == 1) {
     if (obj.isNil()) return runtime.getFalse();
     if (obj instanceof RubyString) {
       ByteList Vbytes = ((RubyString) obj).getByteList();
       if (Vbytes.realSize != 1) return runtime.getFalse();
       int v = Vbytes.bytes[Vbytes.begin] & 0xff;
       ByteList Bbytes = ((RubyString) begin).getByteList();
       int b = Bbytes.bytes[Bbytes.begin] & 0xff;
       ByteList Ebytes = ((RubyString) end).getByteList();
       int e = Ebytes.bytes[Ebytes.begin] & 0xff;
       if (Encoding.isAscii(v) && Encoding.isAscii(b) && Encoding.isAscii(e)) {
         if ((b <= v && v < e) || (!isExclusive && v == e)) return runtime.getTrue();
         return runtime.getFalse();
       }
     }
   }
   return RuntimeHelpers.invokeSuper(context, this, obj, Block.NULL_BLOCK);
 }
Пример #12
0
  public static void updateCharOffsetManyRegs(
      DynamicObject matchData, ByteList value, Encoding encoding) {
    assert RubyGuards.isRubyMatchData(matchData);
    if (Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated) return;

    final Region regs = Layouts.MATCH_DATA.getFields(matchData).region;
    int numRegs = regs.numRegs;

    if (Layouts.MATCH_DATA.getFields(matchData).charOffsets == null
        || Layouts.MATCH_DATA.getFields(matchData).charOffsets.numRegs < numRegs)
      Layouts.MATCH_DATA.getFields(matchData).charOffsets = new Region(numRegs);

    if (encoding.maxLength() == 1) {
      for (int i = 0; i < numRegs; i++) {
        Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[i] = regs.beg[i];
        Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[i] = regs.end[i];
      }
      Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true;
      return;
    }

    Pair[] pairs = new Pair[numRegs * 2];
    for (int i = 0; i < pairs.length; i++) pairs[i] = new Pair();

    int numPos = 0;
    for (int i = 0; i < numRegs; i++) {
      if (regs.beg[i] < 0) continue;
      pairs[numPos++].bytePos = regs.beg[i];
      pairs[numPos++].bytePos = regs.end[i];
    }

    updatePairs(value, encoding, pairs);

    Pair key = new Pair();
    for (int i = 0; i < regs.numRegs; i++) {
      if (regs.beg[i] < 0) {
        Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[i] =
            Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[i] = -1;
        continue;
      }
      key.bytePos = regs.beg[i];
      Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[i] =
          pairs[Arrays.binarySearch(pairs, key)].charPos;
      key.bytePos = regs.end[i];
      Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[i] =
          pairs[Arrays.binarySearch(pairs, key)].charPos;
    }

    Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true;
  }
Пример #13
0
  @JRubyMethod(compat = RUBY1_9, meta = true)
  public static IRubyObject asciicompat_encoding(
      ThreadContext context, IRubyObject self, IRubyObject strOrEnc) {
    Ruby runtime = context.runtime;
    EncodingService encodingService = runtime.getEncodingService();

    Encoding encoding = encodingService.getEncodingFromObjectNoError(strOrEnc);

    if (encoding == null) {
      return context.nil;
    }

    if (encoding.isAsciiCompatible()) {
      return context.nil;
    }

    Encoding asciiCompat = NONASCII_TO_ASCII.get(encoding);

    if (asciiCompat == null) {
      throw runtime.newConverterNotFoundError("no ASCII compatible encoding found for " + strOrEnc);
    }

    return encodingService.convertEncodingToRubyEncoding(asciiCompat);
  }
Пример #14
0
  public static void updateCharOffsetOnlyOneReg(
      DynamicObject matchData, ByteList value, Encoding encoding) {
    assert RubyGuards.isRubyMatchData(matchData);
    if (Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated) return;

    if (Layouts.MATCH_DATA.getFields(matchData).charOffsets == null
        || Layouts.MATCH_DATA.getFields(matchData).charOffsets.numRegs < 1)
      Layouts.MATCH_DATA.getFields(matchData).charOffsets = new Region(1);

    if (encoding.maxLength() == 1) {
      Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[0] =
          Layouts.MATCH_DATA.getFields(matchData).begin;
      Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[0] =
          Layouts.MATCH_DATA.getFields(matchData).end;
      Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true;
      return;
    }

    Pair[] pairs = new Pair[2];
    if (Layouts.MATCH_DATA.getFields(matchData).begin >= 0) {
      pairs[0] = new Pair();
      pairs[0].bytePos = Layouts.MATCH_DATA.getFields(matchData).begin;
      pairs[1] = new Pair();
      pairs[1].bytePos = Layouts.MATCH_DATA.getFields(matchData).end;
    }

    updatePairs(value, encoding, pairs);

    if (Layouts.MATCH_DATA.getFields(matchData).begin < 0) {
      Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[0] =
          Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[0] = -1;
      return;
    }
    Pair key = new Pair();
    key.bytePos = Layouts.MATCH_DATA.getFields(matchData).begin;
    Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[0] =
        pairs[Arrays.binarySearch(pairs, key)].charPos;
    key.bytePos = Layouts.MATCH_DATA.getFields(matchData).end;
    Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[0] =
        pairs[Arrays.binarySearch(pairs, key)].charPos;

    Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true;
  }
Пример #15
0
  private void updateCharOffsetOnlyOneReg(ByteList value, Encoding encoding) {
    if (charOffsets == null || charOffsets.numRegs < 1) charOffsets = new Region(1);

    if (encoding.maxLength() == 1) {
      charOffsets.beg[0] = begin;
      charOffsets.end[0] = end;
      charOffsetUpdated = true;
      return;
    }

    Pair[] pairs = new Pair[2];
    pairs[0] = new Pair();
    pairs[0].bytePos = begin;
    pairs[1] = new Pair();
    pairs[1].bytePos = end;

    updatePairs(value, encoding, pairs);

    charOffsetUpdated = true;
  }
Пример #16
0
  private void updateCharOffsetManyRegs(ByteList value, Encoding encoding) {
    int numRegs = regs.numRegs;

    if (charOffsets == null || charOffsets.numRegs < numRegs) charOffsets = new Region(numRegs);

    if (encoding.maxLength() == 1) {
      for (int i = 0; i < numRegs; i++) {
        charOffsets.beg[i] = regs.beg[i];
        charOffsets.end[i] = regs.end[i];
      }
      return;
    }

    Pair[] pairs = new Pair[numRegs * 2];
    for (int i = 0; i < pairs.length; i++) pairs[i] = new Pair();

    int numPos = 0;
    for (int i = 0; i < numRegs; i++) {
      if (regs.beg[i] < 0) continue;
      pairs[numPos++].bytePos = regs.beg[i];
      pairs[numPos++].bytePos = regs.end[i];
    }

    updatePairs(value, encoding, pairs);

    Pair key = new Pair();
    for (int i = 0; i < regs.numRegs; i++) {
      if (regs.beg[i] < 0) {
        charOffsets.beg[i] = charOffsets.end[i] = -1;
        continue;
      }
      key.bytePos = regs.beg[i];
      charOffsets.beg[i] = pairs[Arrays.binarySearch(pairs, key)].charPos;
      key.bytePos = regs.end[i];
      charOffsets.end[i] = pairs[Arrays.binarySearch(pairs, key)].charPos;
    }
  }
Пример #17
0
 private Entry findEntryFromEncoding(Encoding e) {
   if (e == null) return null;
   return findEncodingEntry(new ByteList(e.getName()));
 }
Пример #18
0
  @Specialization
  public Object read(VirtualFrame frame, byte[] source) {
    // Bit string logic copied from jruby.util.Pack - see copyright and authorship there

    final ByteBuffer encode =
        ByteBuffer.wrap(
            source, getSourcePosition(frame), getSourceLength(frame) - getSourcePosition(frame));

    int occurrences = encode.remaining();

    int length = encode.remaining() * 3 / 4;
    byte[] lElem = new byte[length];
    int a = -1, b = -1, c = 0, d;
    int index = 0;
    int s = -1;

    if (occurrences == 0) {
      if (encode.remaining() % 4 != 0) {
        throw new FormatException("invalid base64");
      }
      while (encode.hasRemaining() && s != '=') {
        a = b = c = -1;
        d = -2;

        // obtain a
        s = Pack.safeGet(encode);
        a = Pack.b64_xtable[s];
        if (a == -1) throw new FormatException("invalid base64");

        // obtain b
        s = Pack.safeGet(encode);
        b = Pack.b64_xtable[s];
        if (b == -1) throw new FormatException("invalid base64");

        // obtain c
        s = Pack.safeGet(encode);
        c = Pack.b64_xtable[s];
        if (s == '=') {
          if (Pack.safeGet(encode) != '=') throw new FormatException("invalid base64");
          break;
        }
        if (c == -1) throw new FormatException("invalid base64");

        // obtain d
        s = Pack.safeGet(encode);
        d = Pack.b64_xtable[s];
        if (s == '=') break;
        if (d == -1) throw new FormatException("invalid base64");

        // calculate based on a, b, c and d
        lElem[index++] = (byte) ((a << 2 | b >> 4) & 255);
        lElem[index++] = (byte) ((b << 4 | c >> 2) & 255);
        lElem[index++] = (byte) ((c << 6 | d) & 255);
      }

      if (encode.hasRemaining()) throw new FormatException("invalid base64");

      if (a != -1 && b != -1) {
        if (c == -1 && s == '=') {
          if ((b & 15) != 0) throw new FormatException("invalid base64");
          lElem[index++] = (byte) ((a << 2 | b >> 4) & 255);
        } else if (c != -1 && s == '=') {
          if ((c & 3) != 0) throw new FormatException("invalid base64");
          lElem[index++] = (byte) ((a << 2 | b >> 4) & 255);
          lElem[index++] = (byte) ((b << 4 | c >> 2) & 255);
        }
      }
    } else {

      while (encode.hasRemaining()) {
        a = b = c = d = -1;

        // obtain a
        s = Pack.safeGet(encode);
        while (((a = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) {
          s = Pack.safeGet(encode);
        }
        if (a == -1) break;

        // obtain b
        s = Pack.safeGet(encode);
        while (((b = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) {
          s = Pack.safeGet(encode);
        }
        if (b == -1) break;

        // obtain c
        s = Pack.safeGet(encode);
        while (((c = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) {
          if (s == '=') break;
          s = Pack.safeGet(encode);
        }
        if ((s == '=') || c == -1) {
          if (s == '=') {
            encode.position(encode.position() - 1);
          }
          break;
        }

        // obtain d
        s = Pack.safeGet(encode);
        while (((d = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) {
          if (s == '=') break;
          s = Pack.safeGet(encode);
        }
        if ((s == '=') || d == -1) {
          if (s == '=') {
            encode.position(encode.position() - 1);
          }
          break;
        }

        // calculate based on a, b, c and d
        lElem[index++] = (byte) ((a << 2 | b >> 4) & 255);
        lElem[index++] = (byte) ((b << 4 | c >> 2) & 255);
        lElem[index++] = (byte) ((c << 6 | d) & 255);
      }

      if (a != -1 && b != -1) {
        if (c == -1 && s == '=') {
          lElem[index++] = (byte) ((a << 2 | b >> 4) & 255);
        } else if (c != -1 && s == '=') {
          lElem[index++] = (byte) ((a << 2 | b >> 4) & 255);
          lElem[index++] = (byte) ((b << 4 | c >> 2) & 255);
        }
      }
    }

    final Encoding encoding = Encoding.load("ASCII");
    final ByteList result = new ByteList(lElem, 0, index, encoding, false);
    setSourcePosition(frame, encode.position());

    return Layouts.STRING.createString(
        getContext().getCoreLibrary().getStringFactory(),
        StringOperations.ropeFromByteList(result, StringSupport.CR_UNKNOWN),
        null);
  }
Пример #19
0
 private RubyEncoding(Ruby runtime, Encoding encoding) {
   super(runtime, runtime.getEncoding());
   this.name = new ByteList(encoding.getName());
   this.isDummy = false;
   this.encoding = encoding;
 }