コード例 #1
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
  /**
   * Returns str, left-padded with pad to a length of len. For example: ('hi', 5, '??') =>
   * '???hi' ('hi', 1, '??') => 'h'
   */
  public UTF8String lpad(int len, UTF8String pad) {
    int spaces = len - this.numChars(); // number of char need to pad
    if (spaces <= 0 || pad.numBytes() == 0) {
      // no padding at all, return the substring of the current string
      return substring(0, len);
    } else {
      int padChars = pad.numChars();
      int count = spaces / padChars; // how many padding string needed
      // the partial string of the padding
      UTF8String remain = pad.substring(0, spaces - padChars * count);

      byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes];

      int offset = 0;
      int idx = 0;
      while (idx < count) {
        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
        ++idx;
        offset += pad.numBytes;
      }
      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
      offset += remain.numBytes;
      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET + offset, numBytes());

      return UTF8String.fromBytes(data);
    }
  }
コード例 #2
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
 public UTF8String trimLeft() {
   int s = 0;
   // skip all of the space (0x20) in the left side
   while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
   if (s == this.numBytes) {
     // empty string
     return UTF8String.fromBytes(new byte[0]);
   } else {
     return copyUTF8String(s, this.numBytes - 1);
   }
 }
コード例 #3
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
  public UTF8String trimRight() {
    int e = numBytes - 1;
    // skip all of the space (0x20) in the right side
    while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;

    if (e < 0) {
      // empty string
      return UTF8String.fromBytes(new byte[0]);
    } else {
      return copyUTF8String(0, e);
    }
  }
コード例 #4
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
  public UTF8String reverse() {
    byte[] result = new byte[this.numBytes];

    int i = 0; // position in byte
    while (i < numBytes) {
      int len = numBytesForFirstByte(getByte(i));
      copyMemory(
          this.base, this.offset + i, result, BYTE_ARRAY_OFFSET + result.length - i - len, len);

      i += len;
    }

    return UTF8String.fromBytes(result);
  }
コード例 #5
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
 public UTF8String trim() {
   int s = 0;
   int e = this.numBytes - 1;
   // skip all of the space (0x20) in the left side
   while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
   // skip all of the space (0x20) in the right side
   while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
   if (s > e) {
     // empty string
     return UTF8String.fromBytes(new byte[0]);
   } else {
     return copyUTF8String(s, e);
   }
 }
コード例 #6
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
  public UTF8String repeat(int times) {
    if (times <= 0) {
      return EMPTY_UTF8;
    }

    byte[] newBytes = new byte[numBytes * times];
    copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes);

    int copied = 1;
    while (copied < times) {
      int toCopy = Math.min(copied, times - copied);
      System.arraycopy(newBytes, 0, newBytes, copied * numBytes, numBytes * toCopy);
      copied += toCopy;
    }

    return UTF8String.fromBytes(newBytes);
  }
コード例 #7
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
  /**
   * Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but
   * can also be used as a general purpose scheme to find word with similar phonemes.
   * https://en.wikipedia.org/wiki/Soundex
   */
  public UTF8String soundex() {
    if (numBytes == 0) {
      return EMPTY_UTF8;
    }

    byte b = getByte(0);
    if ('a' <= b && b <= 'z') {
      b -= 32;
    } else if (b < 'A' || 'Z' < b) {
      // first character must be a letter
      return this;
    }
    byte[] sx = {'0', '0', '0', '0'};
    sx[0] = b;
    int sxi = 1;
    int idx = b - 'A';
    byte lastCode = US_ENGLISH_MAPPING[idx];

    for (int i = 1; i < numBytes; i++) {
      b = getByte(i);
      if ('a' <= b && b <= 'z') {
        b -= 32;
      } else if (b < 'A' || 'Z' < b) {
        // not a letter, skip it
        lastCode = '0';
        continue;
      }
      idx = b - 'A';
      byte code = US_ENGLISH_MAPPING[idx];
      if (code == '7') {
        // ignore it
      } else {
        if (code != '0' && code != lastCode) {
          sx[sxi++] = code;
          if (sxi > 3) break;
        }
        lastCode = code;
      }
    }
    return UTF8String.fromBytes(sx);
  }
コード例 #8
0
ファイル: UTF8String.java プロジェクト: spark6017/spark
 /**
  * Copy the bytes from the current UTF8String, and make a new UTF8String.
  *
  * @param start the start position of the current UTF8String in bytes.
  * @param end the end position of the current UTF8String in bytes.
  * @return a new UTF8String in the position of [start, end] of current UTF8String bytes.
  */
 private UTF8String copyUTF8String(int start, int end) {
   int len = end - start + 1;
   byte[] newBytes = new byte[len];
   copyMemory(base, offset + start, newBytes, BYTE_ARRAY_OFFSET, len);
   return UTF8String.fromBytes(newBytes);
 }