/** * Returns str, left-padded with pad to a length of len. For example: ('hi', 5, '??') => * '???hi' ('hi', 1, '??') => 'h' */ public UTF8String lpad(int len, UTF8String pad) { int spaces = len - this.numChars(); // number of char need to pad if (spaces <= 0 || pad.numBytes() == 0) { // no padding at all, return the substring of the current string return substring(0, len); } else { int padChars = pad.numChars(); int count = spaces / padChars; // how many padding string needed // the partial string of the padding UTF8String remain = pad.substring(0, spaces - padChars * count); byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes]; int offset = 0; int idx = 0; while (idx < count) { copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes); ++idx; offset += pad.numBytes; } copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes); offset += remain.numBytes; copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET + offset, numBytes()); return UTF8String.fromBytes(data); } }
public UTF8String trimLeft() { int s = 0; // skip all of the space (0x20) in the left side while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++; if (s == this.numBytes) { // empty string return UTF8String.fromBytes(new byte[0]); } else { return copyUTF8String(s, this.numBytes - 1); } }
public UTF8String trimRight() { int e = numBytes - 1; // skip all of the space (0x20) in the right side while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--; if (e < 0) { // empty string return UTF8String.fromBytes(new byte[0]); } else { return copyUTF8String(0, e); } }
public UTF8String reverse() { byte[] result = new byte[this.numBytes]; int i = 0; // position in byte while (i < numBytes) { int len = numBytesForFirstByte(getByte(i)); copyMemory( this.base, this.offset + i, result, BYTE_ARRAY_OFFSET + result.length - i - len, len); i += len; } return UTF8String.fromBytes(result); }
public UTF8String trim() { int s = 0; int e = this.numBytes - 1; // skip all of the space (0x20) in the left side while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++; // skip all of the space (0x20) in the right side while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--; if (s > e) { // empty string return UTF8String.fromBytes(new byte[0]); } else { return copyUTF8String(s, e); } }
public UTF8String repeat(int times) { if (times <= 0) { return EMPTY_UTF8; } byte[] newBytes = new byte[numBytes * times]; copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes); int copied = 1; while (copied < times) { int toCopy = Math.min(copied, times - copied); System.arraycopy(newBytes, 0, newBytes, copied * numBytes, numBytes * toCopy); copied += toCopy; } return UTF8String.fromBytes(newBytes); }
/** * Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but * can also be used as a general purpose scheme to find word with similar phonemes. * https://en.wikipedia.org/wiki/Soundex */ public UTF8String soundex() { if (numBytes == 0) { return EMPTY_UTF8; } byte b = getByte(0); if ('a' <= b && b <= 'z') { b -= 32; } else if (b < 'A' || 'Z' < b) { // first character must be a letter return this; } byte[] sx = {'0', '0', '0', '0'}; sx[0] = b; int sxi = 1; int idx = b - 'A'; byte lastCode = US_ENGLISH_MAPPING[idx]; for (int i = 1; i < numBytes; i++) { b = getByte(i); if ('a' <= b && b <= 'z') { b -= 32; } else if (b < 'A' || 'Z' < b) { // not a letter, skip it lastCode = '0'; continue; } idx = b - 'A'; byte code = US_ENGLISH_MAPPING[idx]; if (code == '7') { // ignore it } else { if (code != '0' && code != lastCode) { sx[sxi++] = code; if (sxi > 3) break; } lastCode = code; } } return UTF8String.fromBytes(sx); }
/** * Copy the bytes from the current UTF8String, and make a new UTF8String. * * @param start the start position of the current UTF8String in bytes. * @param end the end position of the current UTF8String in bytes. * @return a new UTF8String in the position of [start, end] of current UTF8String bytes. */ private UTF8String copyUTF8String(int start, int end) { int len = end - start + 1; byte[] newBytes = new byte[len]; copyMemory(base, offset + start, newBytes, BYTE_ARRAY_OFFSET, len); return UTF8String.fromBytes(newBytes); }