/** * @throws IllegalArgumentException if the pattern length is larger than 31 / ⌈ * log<sub>2</sub> (k + 1) ⌉ * @see com.eaio.stringsearch.MismatchSearch#processChars(char[], int) */ public Object processChars(char[] pattern, int k) { Object type = MISMATCH; if ((k << 1) > pattern.length) { type = MATCH; k = pattern.length - k; } int b = clog2(k + 1) + 1; if (pattern.length > (31 / b)) { throw new IllegalArgumentException(); } /* Preprocessing */ int i; int lim = k << ((pattern.length - 1) * b); int ovmask = 0; for (i = 0; i < pattern.length; i++) { ovmask = (ovmask << b) | (1 << (b - 1)); } CharIntMap t; if (type == MATCH) { t = createCharIntMap(pattern); } else { lim += 1 << ((pattern.length - 1) * b); t = createCharIntMap(pattern, ovmask >> (b - 1)); } i = 1; for (int p = 0; p < pattern.length; p++, i <<= b) { if (type == MATCH) { t.set(pattern[p], t.get(pattern[p]) + i); } else { t.set(pattern[p], t.get(pattern[p]) & ~i); } } return new Object[] { t, type, Integer.valueOf(i - 1), Integer.valueOf(ovmask), Integer.valueOf(b), Integer.valueOf(lim) }; }
/** @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) */ @Override public int searchChars( char[] text, int textStart, int textEnd, char[] pattern, Object processed) { CharIntMap b = (CharIntMap) processed; int l = pattern.length < 32 ? pattern.length : 32; int d, j, pos, last; pos = textStart; while (pos <= textEnd - l) { j = l - 1; last = l; d = -1; while (d != 0) { d &= b.get(text[pos + j]); if (d != 0) { if (j == 0) { return pos; } last = j; } --j; d <<= 1; } pos += last; } return -1; }
/** @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) */ public int searchChars( char[] text, int textStart, int textEnd, char[] pattern, Object processed) { CharIntMap m; int l = pattern.length; if (processed instanceof CharIntMap) { m = (CharIntMap) processed; } else { Object[] params = (Object[]) processed; m = (CharIntMap) params[0]; l = ((Integer) params[1]).intValue(); } int lim = ~((1 << (l - 1)) - 1); int state = ~0; for (int i = textStart; i < textEnd; ++i) { state = (state << 1) | m.get(text[i]); if (state < lim) { return i - l + 1; } } return -1; }
/** * Pre-processing of the pattern. The pattern may not exceed 31 characters in length. If it does, * <b>only it's first 31 characters</b> are processed which might lead to unexpected results. * Returns a {@link CharIntMap}. * * @param char[] the pattern * @return an Object * @see StringSearch#processChars(char[]) */ public Object processChars(char[] pattern) { int end = Math.min(pattern.length, 31); CharIntMap m = createCharIntMap(pattern, ~0); for (int i = 0; i < end; ++i) { m.set(pattern[i], m.get(pattern[i]) & ~(1 << i)); } return m; }
/** * Pre-processing of the pattern. The pattern may not exceed 32 bytes in length. If it does, * <b>only it's first 32 bytes</b> are processed which might lead to unexpected results. Returns a * {@link CharIntMap} which is serializable. * * @see com.eaio.stringsearch.StringSearch#processChars(char[]) */ @Override public Object processChars(char[] pattern) { int end = pattern.length < 32 ? pattern.length : 32; CharIntMap b = createCharIntMap(pattern, end, 0); int j = 1; for (int i = end - 1; i >= 0; --i, j <<= 1) { b.set(pattern[i], b.get(pattern[i]) | j); } return b; }
/** * Pre-processes the pattern. The pattern may not exceed 32 characters in length. If it does, * <b>only it's first 32 bytes</b> are processed which might lead to unexpected results. Returns a * {@link CharIntMap}. * * @param pattern the <code>char</code> array containing the pattern, may not be <code>null</code> * @param w the wildcard character * @return a {@link CharIntMap}. */ public Object processChars(char[] pattern, char w) { int j = 0; int end = pattern.length < 32 ? pattern.length : 32; for (int i = 0; i < end; ++i) { if (pattern[i] == w) { j |= (1 << end - i - 1); } } CharIntMap b = createCharIntMap(pattern, j); j = 1; for (int i = end - 1; i >= 0; --i, j <<= 1) { b.set(pattern[i], b.get(pattern[i]) | j); } return b; }
/** @see com.eaio.stringsearch.StringSearch#searchChars(char[], int, int, char[], Object) */ @Override public int searchChars( char[] text, int textStart, int textEnd, char[] pattern, Object processed) { // Unrolled fast paths for patterns of length 1 and 2. Suggested by someone who doesn't want to // be named. if (pattern.length == 1) { final int nLimit = Math.min(text.length, textEnd); for (int n = textStart; n < nLimit; n++) { if (text[n] == pattern[0]) return n; } return -1; } else if (pattern.length == 2) { final int nLimit = Math.min(text.length, textEnd) - 1; for (int n = textStart; n < nLimit; n++) { if (text[n] == pattern[0]) { if (text[n + 1] == pattern[1]) return n; } } return -1; } CharIntMap skip = (CharIntMap) processed; int i, j, k; final int lengthMinusOne = pattern.length - 1; for (k = lengthMinusOne; k < textEnd; k += skip.get(text[k])) { for (j = lengthMinusOne, i = k; j >= 0 && text[i] == pattern[j] && i >= textStart; --j, --i) { // Blank. } if (j == -1) return ++i; } return -1; }
/** * @see com.eaio.stringsearch.MismatchSearch#searchChars(char[], int, int, char[], Object, int) */ public int[] searchChars( char[] text, int textStart, int textEnd, char[] pattern, Object processed, int k) { Object[] o = (Object[]) processed; CharIntMap t = (CharIntMap) o[0]; Object type = o[1]; int mask = ((Integer) o[2]).intValue(); int ovmask = ((Integer) o[3]).intValue(); int b = ((Integer) o[4]).intValue(); int lim = ((Integer) o[5]).intValue(); int state, overflow; if (type == MATCH) { state = 0; overflow = 0; } else { state = mask & ~ovmask; overflow = ovmask; } for (int p = textStart; p < textEnd; p++) { state = ((state << b) + t.get(text[p])) & mask; overflow = ((overflow << b) | (state & ovmask)) & mask; state &= ~ovmask; if (type == MATCH) { if ((state | overflow) >= lim) { return new int[] {p - pattern.length + 1, pattern.length - k}; } } else if ((state | overflow) < lim) { return new int[] {p - pattern.length + 1, (state >> (pattern.length - 1) * b)}; } } return new int[] {-1, 0}; }