public RubyEncoding getEncoding(Encoding enc) { int index = enc.getIndex(); RubyEncoding rubyEncoding; if (index < encodingIndex.length && (rubyEncoding = encodingIndex[index]) != null) { return rubyEncoding; } enc = loadEncoding(new ByteList(enc.getName(), false)); return encodingIndex[enc.getIndex()]; }
private void defineEncodingConstants( Ruby runtime, RubyEncoding encoding, byte[] name, int p, int end) { Encoding enc = ASCIIEncoding.INSTANCE; int s = p; int code = name[s] & 0xff; if (enc.isDigit(code)) return; boolean hasUpper = false; boolean hasLower = false; if (enc.isUpper(code)) { hasUpper = true; while (++s < end && (enc.isAlnum(name[s] & 0xff) || name[s] == (byte) '_')) { if (enc.isLower(name[s] & 0xff)) hasLower = true; } } boolean isValid = false; if (s >= end) { isValid = true; defineEncodingConstant(runtime, encoding, name, p, end); } if (!isValid || hasLower) { if (!hasLower || !hasUpper) { do { code = name[s] & 0xff; if (enc.isLower(code)) hasLower = true; if (enc.isUpper(code)) hasUpper = true; } while (++s < end && (!hasLower || !hasUpper)); } byte[] constName = new byte[end - p]; System.arraycopy(name, p, constName, 0, end - p); s = 0; code = constName[s] & 0xff; if (!isValid) { if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code]; for (; s < constName.length; ++s) { if (!enc.isAlnum(constName[s] & 0xff)) constName[s] = (byte) '_'; } if (hasUpper) { defineEncodingConstant(runtime, encoding, constName, 0, constName.length); } } if (hasLower) { for (s = 0; s < constName.length; ++s) { code = constName[s] & 0xff; if (enc.isLower(code)) constName[s] = AsciiTables.ToUpperCaseTable[code]; } defineEncodingConstant(runtime, encoding, constName, 0, constName.length); } } }
public Encoding loadEncoding(ByteList name) { Entry entry = findEncodingOrAliasEntry(name); if (entry == null) return null; Encoding enc = entry.getEncoding(); // load the encoding int index = enc.getIndex(); if (index >= encodingIndex.length) { RubyEncoding tmp[] = new RubyEncoding[index + 4]; System.arraycopy(encodingIndex, 0, tmp, 0, encodingIndex.length); encodingIndex = tmp; } encodingIndex[index] = (RubyEncoding) encodingList[entry.getIndex()]; return enc; }
/** * Get a java.nio Charset for the given encoding, or null if impossible * * @param encoding the encoding * @return the charset */ public Charset charsetForEncoding(Encoding encoding) { Charset charset = encoding.getCharset(); if (encoding.toString().equals("ASCII-8BIT")) { return Charset.forName("ASCII"); } try { return Charset.forName(encoding.toString()); } catch (UnsupportedCharsetException uce) { throw runtime.newEncodingCompatibilityError( "no java.nio.charset.Charset found for encoding `" + encoding.toString() + "'"); } }
@JRubyMethod(name = "chr", compat = CompatVersion.RUBY1_9) public RubyString chr19(ThreadContext context, IRubyObject arg) { Ruby runtime = context.getRuntime(); long value = getLongValue(); Encoding enc = arg.convertToString().toEncoding(runtime); int n; if (value < 0 || (n = StringSupport.codeLength(runtime, enc, (int) value)) <= 0) { throw runtime.newRangeError(this.toString() + " out of char range"); } ByteList bytes = new ByteList(n); enc.codeToMbc((int) value, bytes.getUnsafeBytes(), 0); bytes.setRealSize(n); return RubyString.newStringNoCopy(runtime, bytes, enc, 0); }
public static Encoding areCompatible(IRubyObject obj1, IRubyObject obj2) { Encoding enc1 = null; Encoding enc2 = null; if (obj1 instanceof RubyEncoding) { enc1 = ((RubyEncoding) obj1).getEncoding(); } else if (obj1 instanceof EncodingCapable) { enc1 = ((EncodingCapable) obj1).getEncoding(); } if (obj2 instanceof RubyEncoding) { enc2 = ((RubyEncoding) obj2).getEncoding(); } else if (obj2 instanceof EncodingCapable) { enc2 = ((EncodingCapable) obj2).getEncoding(); } if (enc1 != null && enc2 != null) { if (enc1 == enc2) return enc1; if (obj2 instanceof RubyString && ((RubyString) obj2).getByteList().getRealSize() == 0) return enc1; if (obj1 instanceof RubyString && ((RubyString) obj1).getByteList().getRealSize() == 0) return enc2; if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null; if (!(obj2 instanceof RubyString) && enc2 instanceof USASCIIEncoding) return enc1; if (!(obj1 instanceof RubyString) && enc1 instanceof USASCIIEncoding) return enc2; if (!(obj1 instanceof RubyString)) { IRubyObject objTmp = obj1; obj1 = obj2; obj1 = objTmp; Encoding encTmp = enc1; enc1 = enc2; enc2 = encTmp; } if (obj1 instanceof RubyString) { int cr1 = ((RubyString) obj1).scanForCodeRange(); if (obj2 instanceof RubyString) { int cr2 = ((RubyString) obj2).scanForCodeRange(); return areCompatible(enc1, cr1, enc2, cr2); } if (cr1 == StringSupport.CR_7BIT) return enc2; } } return null; }
private ByteList fromEncodedBytes(Ruby runtime, Encoding enc, int value) { int n; try { n = value < 0 ? 0 : enc.codeToMbcLength(value); } catch (EncodingException ee) { n = 0; } if (n <= 0) throw runtime.newRangeError(this.toString() + " out of char range"); ByteList bytes = new ByteList(n); enc.codeToMbc(value, bytes.getUnsafeBytes(), 0); bytes.setRealSize(n); return bytes; }
@JRubyMethod public IRubyObject convpath(ThreadContext context) { // we always pass through UTF-16 IRubyObject utf16Encoding = context.runtime.getEncodingService().getEncodingList()[UTF16.getIndex()]; return RubyArray.newArray( context.runtime, RubyArray.newArray(context.runtime, srcEncoding, utf16Encoding), RubyArray.newArray(context.runtime, utf16Encoding, destEncoding)); }
@JRubyMethod public IRubyObject convpath(ThreadContext context) { Ruby runtime = context.runtime; EncodingService encodingService = runtime.getEncodingService(); // we always pass through UTF-16 IRubyObject utf16Encoding = encodingService.getEncodingList()[UTF16.getIndex()]; return RubyArray.newArray( runtime, RubyArray.newArray(runtime, source_encoding(context), utf16Encoding), RubyArray.newArray(runtime, utf16Encoding, destination_encoding(context))); }
private void updateCharOffset() { if (charOffsetUpdated) return; ByteList value = str.getByteList(); Encoding enc = value.getEncoding(); if (regs == null) { updateCharOffsetOnlyOneReg(value, enc); return; } int numRegs = regs.numRegs; if (charOffsets == null || charOffsets.numRegs < numRegs) charOffsets = new Region(numRegs); if (enc.maxLength() == 1) { for (int i = 0; i < numRegs; i++) { charOffsets.beg[i] = regs.beg[i]; charOffsets.end[i] = regs.end[i]; } charOffsetUpdated = true; return; } Pair[] pairs = new Pair[numRegs * 2]; for (int i = 0; i < pairs.length; i++) pairs[i] = new Pair(); int numPos = 0; for (int i = 0; i < numRegs; i++) { if (regs.beg[i] < 0) continue; pairs[numPos++].bytePos = regs.beg[i]; pairs[numPos++].bytePos = regs.end[i]; } updatePairs(value, enc, pairs); charOffsetUpdated = true; }
@JRubyMethod( name = {"include?", "member?"}, frame = true, compat = CompatVersion.RUBY1_9) public IRubyObject include_p19(ThreadContext context, IRubyObject obj) { Ruby runtime = context.getRuntime(); if (begin instanceof RubyNumeric || end instanceof RubyNumeric || !TypeConverter.convertToTypeWithCheck(begin, runtime.getInteger(), "to_int").isNil() || !TypeConverter.convertToTypeWithCheck(end, runtime.getInteger(), "to_int").isNil()) { if (rangeLe(context, begin, obj) != null) { if (isExclusive) { if (rangeLt(context, obj, end) != null) return runtime.getTrue(); } else { if (rangeLe(context, obj, end) != null) return runtime.getTrue(); } } return runtime.getFalse(); } else if (begin instanceof RubyString && end instanceof RubyString && ((RubyString) begin).getByteList().realSize == 1 && ((RubyString) end).getByteList().realSize == 1) { if (obj.isNil()) return runtime.getFalse(); if (obj instanceof RubyString) { ByteList Vbytes = ((RubyString) obj).getByteList(); if (Vbytes.realSize != 1) return runtime.getFalse(); int v = Vbytes.bytes[Vbytes.begin] & 0xff; ByteList Bbytes = ((RubyString) begin).getByteList(); int b = Bbytes.bytes[Bbytes.begin] & 0xff; ByteList Ebytes = ((RubyString) end).getByteList(); int e = Ebytes.bytes[Ebytes.begin] & 0xff; if (Encoding.isAscii(v) && Encoding.isAscii(b) && Encoding.isAscii(e)) { if ((b <= v && v < e) || (!isExclusive && v == e)) return runtime.getTrue(); return runtime.getFalse(); } } } return RuntimeHelpers.invokeSuper(context, this, obj, Block.NULL_BLOCK); }
public static void updateCharOffsetManyRegs( DynamicObject matchData, ByteList value, Encoding encoding) { assert RubyGuards.isRubyMatchData(matchData); if (Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated) return; final Region regs = Layouts.MATCH_DATA.getFields(matchData).region; int numRegs = regs.numRegs; if (Layouts.MATCH_DATA.getFields(matchData).charOffsets == null || Layouts.MATCH_DATA.getFields(matchData).charOffsets.numRegs < numRegs) Layouts.MATCH_DATA.getFields(matchData).charOffsets = new Region(numRegs); if (encoding.maxLength() == 1) { for (int i = 0; i < numRegs; i++) { Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[i] = regs.beg[i]; Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[i] = regs.end[i]; } Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true; return; } Pair[] pairs = new Pair[numRegs * 2]; for (int i = 0; i < pairs.length; i++) pairs[i] = new Pair(); int numPos = 0; for (int i = 0; i < numRegs; i++) { if (regs.beg[i] < 0) continue; pairs[numPos++].bytePos = regs.beg[i]; pairs[numPos++].bytePos = regs.end[i]; } updatePairs(value, encoding, pairs); Pair key = new Pair(); for (int i = 0; i < regs.numRegs; i++) { if (regs.beg[i] < 0) { Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[i] = Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[i] = -1; continue; } key.bytePos = regs.beg[i]; Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[i] = pairs[Arrays.binarySearch(pairs, key)].charPos; key.bytePos = regs.end[i]; Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[i] = pairs[Arrays.binarySearch(pairs, key)].charPos; } Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true; }
@JRubyMethod(compat = RUBY1_9, meta = true) public static IRubyObject asciicompat_encoding( ThreadContext context, IRubyObject self, IRubyObject strOrEnc) { Ruby runtime = context.runtime; EncodingService encodingService = runtime.getEncodingService(); Encoding encoding = encodingService.getEncodingFromObjectNoError(strOrEnc); if (encoding == null) { return context.nil; } if (encoding.isAsciiCompatible()) { return context.nil; } Encoding asciiCompat = NONASCII_TO_ASCII.get(encoding); if (asciiCompat == null) { throw runtime.newConverterNotFoundError("no ASCII compatible encoding found for " + strOrEnc); } return encodingService.convertEncodingToRubyEncoding(asciiCompat); }
public static void updateCharOffsetOnlyOneReg( DynamicObject matchData, ByteList value, Encoding encoding) { assert RubyGuards.isRubyMatchData(matchData); if (Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated) return; if (Layouts.MATCH_DATA.getFields(matchData).charOffsets == null || Layouts.MATCH_DATA.getFields(matchData).charOffsets.numRegs < 1) Layouts.MATCH_DATA.getFields(matchData).charOffsets = new Region(1); if (encoding.maxLength() == 1) { Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[0] = Layouts.MATCH_DATA.getFields(matchData).begin; Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[0] = Layouts.MATCH_DATA.getFields(matchData).end; Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true; return; } Pair[] pairs = new Pair[2]; if (Layouts.MATCH_DATA.getFields(matchData).begin >= 0) { pairs[0] = new Pair(); pairs[0].bytePos = Layouts.MATCH_DATA.getFields(matchData).begin; pairs[1] = new Pair(); pairs[1].bytePos = Layouts.MATCH_DATA.getFields(matchData).end; } updatePairs(value, encoding, pairs); if (Layouts.MATCH_DATA.getFields(matchData).begin < 0) { Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[0] = Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[0] = -1; return; } Pair key = new Pair(); key.bytePos = Layouts.MATCH_DATA.getFields(matchData).begin; Layouts.MATCH_DATA.getFields(matchData).charOffsets.beg[0] = pairs[Arrays.binarySearch(pairs, key)].charPos; key.bytePos = Layouts.MATCH_DATA.getFields(matchData).end; Layouts.MATCH_DATA.getFields(matchData).charOffsets.end[0] = pairs[Arrays.binarySearch(pairs, key)].charPos; Layouts.MATCH_DATA.getFields(matchData).charOffsetUpdated = true; }
private void updateCharOffsetOnlyOneReg(ByteList value, Encoding encoding) { if (charOffsets == null || charOffsets.numRegs < 1) charOffsets = new Region(1); if (encoding.maxLength() == 1) { charOffsets.beg[0] = begin; charOffsets.end[0] = end; charOffsetUpdated = true; return; } Pair[] pairs = new Pair[2]; pairs[0] = new Pair(); pairs[0].bytePos = begin; pairs[1] = new Pair(); pairs[1].bytePos = end; updatePairs(value, encoding, pairs); charOffsetUpdated = true; }
private void updateCharOffsetManyRegs(ByteList value, Encoding encoding) { int numRegs = regs.numRegs; if (charOffsets == null || charOffsets.numRegs < numRegs) charOffsets = new Region(numRegs); if (encoding.maxLength() == 1) { for (int i = 0; i < numRegs; i++) { charOffsets.beg[i] = regs.beg[i]; charOffsets.end[i] = regs.end[i]; } return; } Pair[] pairs = new Pair[numRegs * 2]; for (int i = 0; i < pairs.length; i++) pairs[i] = new Pair(); int numPos = 0; for (int i = 0; i < numRegs; i++) { if (regs.beg[i] < 0) continue; pairs[numPos++].bytePos = regs.beg[i]; pairs[numPos++].bytePos = regs.end[i]; } updatePairs(value, encoding, pairs); Pair key = new Pair(); for (int i = 0; i < regs.numRegs; i++) { if (regs.beg[i] < 0) { charOffsets.beg[i] = charOffsets.end[i] = -1; continue; } key.bytePos = regs.beg[i]; charOffsets.beg[i] = pairs[Arrays.binarySearch(pairs, key)].charPos; key.bytePos = regs.end[i]; charOffsets.end[i] = pairs[Arrays.binarySearch(pairs, key)].charPos; } }
private Entry findEntryFromEncoding(Encoding e) { if (e == null) return null; return findEncodingEntry(new ByteList(e.getName())); }
@Specialization public Object read(VirtualFrame frame, byte[] source) { // Bit string logic copied from jruby.util.Pack - see copyright and authorship there final ByteBuffer encode = ByteBuffer.wrap( source, getSourcePosition(frame), getSourceLength(frame) - getSourcePosition(frame)); int occurrences = encode.remaining(); int length = encode.remaining() * 3 / 4; byte[] lElem = new byte[length]; int a = -1, b = -1, c = 0, d; int index = 0; int s = -1; if (occurrences == 0) { if (encode.remaining() % 4 != 0) { throw new FormatException("invalid base64"); } while (encode.hasRemaining() && s != '=') { a = b = c = -1; d = -2; // obtain a s = Pack.safeGet(encode); a = Pack.b64_xtable[s]; if (a == -1) throw new FormatException("invalid base64"); // obtain b s = Pack.safeGet(encode); b = Pack.b64_xtable[s]; if (b == -1) throw new FormatException("invalid base64"); // obtain c s = Pack.safeGet(encode); c = Pack.b64_xtable[s]; if (s == '=') { if (Pack.safeGet(encode) != '=') throw new FormatException("invalid base64"); break; } if (c == -1) throw new FormatException("invalid base64"); // obtain d s = Pack.safeGet(encode); d = Pack.b64_xtable[s]; if (s == '=') break; if (d == -1) throw new FormatException("invalid base64"); // calculate based on a, b, c and d lElem[index++] = (byte) ((a << 2 | b >> 4) & 255); lElem[index++] = (byte) ((b << 4 | c >> 2) & 255); lElem[index++] = (byte) ((c << 6 | d) & 255); } if (encode.hasRemaining()) throw new FormatException("invalid base64"); if (a != -1 && b != -1) { if (c == -1 && s == '=') { if ((b & 15) != 0) throw new FormatException("invalid base64"); lElem[index++] = (byte) ((a << 2 | b >> 4) & 255); } else if (c != -1 && s == '=') { if ((c & 3) != 0) throw new FormatException("invalid base64"); lElem[index++] = (byte) ((a << 2 | b >> 4) & 255); lElem[index++] = (byte) ((b << 4 | c >> 2) & 255); } } } else { while (encode.hasRemaining()) { a = b = c = d = -1; // obtain a s = Pack.safeGet(encode); while (((a = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) { s = Pack.safeGet(encode); } if (a == -1) break; // obtain b s = Pack.safeGet(encode); while (((b = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) { s = Pack.safeGet(encode); } if (b == -1) break; // obtain c s = Pack.safeGet(encode); while (((c = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) { if (s == '=') break; s = Pack.safeGet(encode); } if ((s == '=') || c == -1) { if (s == '=') { encode.position(encode.position() - 1); } break; } // obtain d s = Pack.safeGet(encode); while (((d = Pack.b64_xtable[s]) == -1) && encode.hasRemaining()) { if (s == '=') break; s = Pack.safeGet(encode); } if ((s == '=') || d == -1) { if (s == '=') { encode.position(encode.position() - 1); } break; } // calculate based on a, b, c and d lElem[index++] = (byte) ((a << 2 | b >> 4) & 255); lElem[index++] = (byte) ((b << 4 | c >> 2) & 255); lElem[index++] = (byte) ((c << 6 | d) & 255); } if (a != -1 && b != -1) { if (c == -1 && s == '=') { lElem[index++] = (byte) ((a << 2 | b >> 4) & 255); } else if (c != -1 && s == '=') { lElem[index++] = (byte) ((a << 2 | b >> 4) & 255); lElem[index++] = (byte) ((b << 4 | c >> 2) & 255); } } } final Encoding encoding = Encoding.load("ASCII"); final ByteList result = new ByteList(lElem, 0, index, encoding, false); setSourcePosition(frame, encode.position()); return Layouts.STRING.createString( getContext().getCoreLibrary().getStringFactory(), StringOperations.ropeFromByteList(result, StringSupport.CR_UNKNOWN), null); }
private RubyEncoding(Ruby runtime, Encoding encoding) { super(runtime, runtime.getEncoding()); this.name = new ByteList(encoding.getName()); this.isDummy = false; this.encoding = encoding; }