Example #1
0
  /** @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) */
  @Override
  public int searchChars(
      char[] text, int textStart, int textEnd, char[] pattern, Object processed) {

    CharIntMap b = (CharIntMap) processed;
    int l = pattern.length < 32 ? pattern.length : 32;

    int d, j, pos, last;
    pos = textStart;
    while (pos <= textEnd - l) {
      j = l - 1;
      last = l;
      d = -1;
      while (d != 0) {
        d &= b.get(text[pos + j]);
        if (d != 0) {
          if (j == 0) {
            return pos;
          }
          last = j;
        }
        --j;
        d <<= 1;
      }
      pos += last;
    }

    return -1;
  }
Example #2
0
  /** @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) */
  public int searchChars(
      char[] text, int textStart, int textEnd, char[] pattern, Object processed) {

    CharIntMap m;
    int l = pattern.length;

    if (processed instanceof CharIntMap) {
      m = (CharIntMap) processed;
    } else {
      Object[] params = (Object[]) processed;
      m = (CharIntMap) params[0];
      l = ((Integer) params[1]).intValue();
    }

    int lim = ~((1 << (l - 1)) - 1);
    int state = ~0;
    for (int i = textStart; i < textEnd; ++i) {
      state = (state << 1) | m.get(text[i]);
      if (state < lim) {
        return i - l + 1;
      }
    }

    return -1;
  }
Example #3
0
  /**
   * Pre-processing of the pattern. The pattern may not exceed 31 characters in length. If it does,
   * <b>only it's first 31 characters</b> are processed which might lead to unexpected results.
   * Returns a {@link CharIntMap}.
   *
   * @param char[] the pattern
   * @return an Object
   * @see StringSearch#processChars(char[])
   */
  public Object processChars(char[] pattern) {
    int end = Math.min(pattern.length, 31);

    CharIntMap m = createCharIntMap(pattern, ~0);

    for (int i = 0; i < end; ++i) {
      m.set(pattern[i], m.get(pattern[i]) & ~(1 << i));
    }

    return m;
  }
  /**
   * @throws IllegalArgumentException if the pattern length is larger than 31 / &lceil;
   *     log<sub>2</sub> (k + 1) &rceil;
   * @see com.eaio.stringsearch.MismatchSearch#processChars(char[], int)
   */
  public Object processChars(char[] pattern, int k) {

    Object type = MISMATCH;

    if ((k << 1) > pattern.length) {
      type = MATCH;
      k = pattern.length - k;
    }

    int b = clog2(k + 1) + 1;

    if (pattern.length > (31 / b)) {
      throw new IllegalArgumentException();
    }

    /* Preprocessing */

    int i;
    int lim = k << ((pattern.length - 1) * b);
    int ovmask = 0;

    for (i = 0; i < pattern.length; i++) {
      ovmask = (ovmask << b) | (1 << (b - 1));
    }

    CharIntMap t;

    if (type == MATCH) {
      t = createCharIntMap(pattern);
    } else {
      lim += 1 << ((pattern.length - 1) * b);
      t = createCharIntMap(pattern, ovmask >> (b - 1));
    }

    i = 1;
    for (int p = 0; p < pattern.length; p++, i <<= b) {
      if (type == MATCH) {
        t.set(pattern[p], t.get(pattern[p]) + i);
      } else {
        t.set(pattern[p], t.get(pattern[p]) & ~i);
      }
    }

    return new Object[] {
      t,
      type,
      Integer.valueOf(i - 1),
      Integer.valueOf(ovmask),
      Integer.valueOf(b),
      Integer.valueOf(lim)
    };
  }
Example #5
0
  /**
   * Pre-processing of the pattern. The pattern may not exceed 32 bytes in length. If it does,
   * <b>only it's first 32 bytes</b> are processed which might lead to unexpected results. Returns a
   * {@link CharIntMap} which is serializable.
   *
   * @see com.eaio.stringsearch.StringSearch#processChars(char[])
   */
  @Override
  public Object processChars(char[] pattern) {
    int end = pattern.length < 32 ? pattern.length : 32;

    CharIntMap b = createCharIntMap(pattern, end, 0);

    int j = 1;
    for (int i = end - 1; i >= 0; --i, j <<= 1) {
      b.set(pattern[i], b.get(pattern[i]) | j);
    }

    return b;
  }
  /**
   * Returns a {@link CharIntMap} for patterns longer than 2 characters, <code>null</code>
   * otherwise.
   *
   * @see com.eaio.stringsearch.StringSearch#processChars(char[])
   */
  @Override
  public Object processChars(char[] pattern) {
    if (pattern.length == 1 || pattern.length == 2) {
      return null;
    }

    CharIntMap skip = createCharIntMap(pattern, pattern.length);

    for (int i = 0; i < pattern.length - 1; ++i) {
      skip.set(pattern[i], pattern.length - i - 1);
    }

    return skip;
  }
Example #7
0
  /**
   * Pre-processes the pattern. The pattern may not exceed 32 characters in length. If it does,
   * <b>only it's first 32 bytes</b> are processed which might lead to unexpected results. Returns a
   * {@link CharIntMap}.
   *
   * @param pattern the <code>char</code> array containing the pattern, may not be <code>null</code>
   * @param w the wildcard character
   * @return a {@link CharIntMap}.
   */
  public Object processChars(char[] pattern, char w) {
    int j = 0;
    int end = pattern.length < 32 ? pattern.length : 32;

    for (int i = 0; i < end; ++i) {
      if (pattern[i] == w) {
        j |= (1 << end - i - 1);
      }
    }

    CharIntMap b = createCharIntMap(pattern, j);

    j = 1;
    for (int i = end - 1; i >= 0; --i, j <<= 1) {
      b.set(pattern[i], b.get(pattern[i]) | j);
    }

    return b;
  }
  /** @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) */
  @Override
  public int searchChars(
      char[] text, int textStart, int textEnd, char[] pattern, Object processed) {

    // Unrolled fast paths for patterns of length 1 and 2. Suggested by someone who doesn't want to
    // be named.

    if (pattern.length == 1) {
      final int nLimit = Math.min(text.length, textEnd);
      for (int n = textStart; n < nLimit; n++) {
        if (text[n] == pattern[0]) return n;
      }
      return -1;
    } else if (pattern.length == 2) {
      final int nLimit = Math.min(text.length, textEnd) - 1;
      for (int n = textStart; n < nLimit; n++) {
        if (text[n] == pattern[0]) {
          if (text[n + 1] == pattern[1]) return n;
        }
      }
      return -1;
    }

    CharIntMap skip = (CharIntMap) processed;

    int i, j, k;

    final int lengthMinusOne = pattern.length - 1;

    for (k = lengthMinusOne; k < textEnd; k += skip.get(text[k])) {
      for (j = lengthMinusOne, i = k; j >= 0 && text[i] == pattern[j] && i >= textStart; --j, --i) {
        // Blank.
      }
      if (j == -1) return ++i;
    }

    return -1;
  }
  /**
   * @see com.eaio.stringsearch.MismatchSearch#searchChars(char[], int, int, char[], Object, int)
   */
  public int[] searchChars(
      char[] text, int textStart, int textEnd, char[] pattern, Object processed, int k) {

    Object[] o = (Object[]) processed;
    CharIntMap t = (CharIntMap) o[0];
    Object type = o[1];
    int mask = ((Integer) o[2]).intValue();
    int ovmask = ((Integer) o[3]).intValue();
    int b = ((Integer) o[4]).intValue();
    int lim = ((Integer) o[5]).intValue();

    int state, overflow;

    if (type == MATCH) {
      state = 0;
      overflow = 0;
    } else {
      state = mask & ~ovmask;
      overflow = ovmask;
    }

    for (int p = textStart; p < textEnd; p++) {
      state = ((state << b) + t.get(text[p])) & mask;
      overflow = ((overflow << b) | (state & ovmask)) & mask;
      state &= ~ovmask;
      if (type == MATCH) {
        if ((state | overflow) >= lim) {
          return new int[] {p - pattern.length + 1, pattern.length - k};
        }
      } else if ((state | overflow) < lim) {
        return new int[] {p - pattern.length + 1, (state >> (pattern.length - 1) * b)};
      }
    }

    return new int[] {-1, 0};
  }