/** * Calculates the safe shifts to use if searching backwards. A safe shift is either the length * of the sequence, if the byte does not appear in the {@link SequenceMatcher}, or the shortest * distance it appears from the beginning of the matcher, with zero being the value of the first * position in the sequence. */ @Override public SearchInfo create() { // Get info about the matcher: final SequenceMatcher sequence = getMatcher(); final int sequenceLength = sequence.length(); // Create the search info object fields final int lastPosition = sequenceLength - 1; final ByteMatcher byteMatcher = sequence.getMatcherForPosition(0); final SequenceMatcher verifier = (lastPosition == 0) ? null : sequence.subsequence(1, sequenceLength); // Set the default shift to the length of the sequence final int[] shifts = new int[256]; Arrays.fill(shifts, sequenceLength); // Now set specific byte shifts for the bytes actually in // the sequence itself. The shift is the position in the sequence, // but we do not create a shift for the first position 0. for (int sequencePos = lastPosition; sequencePos > 0; sequencePos--) { final ByteMatcher aMatcher = sequence.getMatcherForPosition(sequencePos); final byte[] matchingBytes = aMatcher.getMatchingBytes(); for (final byte b : matchingBytes) { shifts[b & 0xFF] = sequencePos; } } return new SearchInfo(shifts, byteMatcher, verifier); }
/** * Calculates the safe shifts to use if searching forwards. A safe shift is either the length of * the sequence, if the byte does not appear in the {@link SequenceMatcher}, or the shortest * distance it appears from the end of the matcher. */ @Override public SearchInfo create() { // Get info about the matcher: final SequenceMatcher sequence = getMatcher(); final int sequenceLength = sequence.length(); // Create the search info object fields: final int lastPosition = sequenceLength - 1; final ByteMatcher byteMatcher = sequence.getMatcherForPosition(lastPosition); final SequenceMatcher verifier = (lastPosition == 0) ? AnyByteMatcher.ANY_BYTE_MATCHER : sequence.subsequence(0, lastPosition); // Set the default shift to the length of the sequence for all possible byte values: final int[] shifts = new int[256]; Arrays.fill(shifts, sequenceLength); // Now set specific shifts for the bytes actually in // the sequence itself. The shift is the distance of a position // from the end of the sequence, but we do not create a shift for // the very last position. for (int sequencePos = 0; sequencePos < lastPosition; sequencePos++) { final ByteMatcher aMatcher = sequence.getMatcherForPosition(sequencePos); final byte[] matchingBytes = aMatcher.getMatchingBytes(); final int distanceFromEnd = sequenceLength - sequencePos - 1; for (final byte b : matchingBytes) { shifts[b & 0xFF] = distanceFromEnd; } } return new SearchInfo(shifts, byteMatcher, verifier); }
/** {@inheritDoc} */ @Override protected List<SearchResult<SequenceMatcher>> doSearchBackwards( final WindowReader reader, final long fromPosition, final long toPosition) throws IOException { // Initialise: final SearchInfo info = backwardInfo.get(); final int[] safeShifts = info.shifts; final ByteMatcher startOfSequence = info.matcher; final SequenceMatcher verifier = info.verifier; long searchPosition = fromPosition; // Search backwards across the windows: Window window; while (searchPosition >= toPosition && (window = reader.getWindow(searchPosition)) != null) { // Initialise the window search: final byte[] array = window.getArray(); final int arrayStartPosition = reader.getWindowOffset(searchPosition); final long distanceToEnd = toPosition - window.getWindowPosition(); final int lastSearchPosition = distanceToEnd > 0 ? (int) distanceToEnd : 0; int arraySearchPosition = arrayStartPosition; // Search using the byte array for shifts, using the WindowReader // for verifiying the sequence with the matcher: ARRAY_SEARCH: while (arraySearchPosition >= lastSearchPosition) { // Shift backwards until we match the first position in the sequence, // or we run out of search space. byte currentByte = array[arraySearchPosition]; while (!startOfSequence.matches(currentByte)) { arraySearchPosition -= safeShifts[currentByte & 0xff]; if (arraySearchPosition < lastSearchPosition) { break ARRAY_SEARCH; } currentByte = array[arraySearchPosition]; } // The first byte matched - verify there is a complete match. final int totalShift = arrayStartPosition - arraySearchPosition; final long sequencePosition = searchPosition - totalShift; if (verifier == null || verifier.matches(reader, sequencePosition + 1)) { return SearchUtils.singleResult(sequencePosition, matcher); // match found. } // No match was found - shift backward by the shift for the current byte: arraySearchPosition -= safeShifts[currentByte & 0xff]; } // No match was found in this array - calculate the current search position: searchPosition -= (arrayStartPosition - arraySearchPosition); } return SearchUtils.noResults(); }
/** {@inheritDoc} */ @Override public List<SearchResult<SequenceMatcher>> searchForwards( final byte[] bytes, final int fromPosition, final int toPosition) { // Get the objects needed to search: final SearchInfo info = forwardInfo.get(); final int[] safeShifts = info.shifts; final ByteMatcher endOfSequence = info.matcher; final SequenceMatcher verifier = info.verifier; // Determine a safe position to start searching at. final int lastMatcherPosition = getMatcher().length() - 1; int searchPosition = fromPosition > 0 ? fromPosition + lastMatcherPosition : lastMatcherPosition; // Calculate safe bounds for the end of the search: final int lastPossiblePosition = bytes.length - 1; final int lastPossibleSearchPosition = toPosition + lastMatcherPosition; final int finalPosition = lastPossibleSearchPosition < lastPossiblePosition ? lastPossibleSearchPosition : lastPossiblePosition; // Search forwards: while (searchPosition <= finalPosition) { // Shift forwards until we match the last position in the sequence, // or we run out of search space (in which case just return not found). byte currentByte = bytes[searchPosition]; while (!endOfSequence.matches(currentByte)) { searchPosition += safeShifts[currentByte & 0xff]; if (searchPosition > finalPosition) { return SearchUtils.noResults(); } currentByte = bytes[searchPosition]; } // The last byte matched - verify there is a complete match: final int startMatchPosition = searchPosition - lastMatcherPosition; if (verifier.matchesNoBoundsCheck(bytes, startMatchPosition)) { return SearchUtils.singleResult(startMatchPosition, matcher); // match found. } // No match was found - shift forward by the shift for the current byte: searchPosition += safeShifts[currentByte & 0xff]; } return SearchUtils.noResults(); }
/** {@inheritDoc} */ @Override public List<SearchResult<SequenceMatcher>> searchBackwards( final byte[] bytes, final int fromPosition, final int toPosition) { // Get objects needed for the search: final SearchInfo info = backwardInfo.get(); final int[] safeShifts = info.shifts; final ByteMatcher startOfSequence = info.matcher; final SequenceMatcher verifier = info.verifier; // Calculate safe bounds for the start of the search: final int firstPossiblePosition = bytes.length - getMatcher().length(); int searchPosition = fromPosition < firstPossiblePosition ? fromPosition : firstPossiblePosition; // Calculate safe bounds for the end of the search: final int lastPosition = toPosition > 0 ? toPosition : 0; // Search backwards: while (searchPosition >= lastPosition) { // Shift backwards until we match the first position in the // sequence, or we run out of search space: byte currentByte = bytes[searchPosition]; while (!startOfSequence.matches(currentByte)) { searchPosition -= safeShifts[currentByte & 0xFF]; if (searchPosition < lastPosition) { return SearchUtils.noResults(); } currentByte = bytes[searchPosition]; } // The first byte matched - verify there is a complete match. // There is only a verifier if the sequence length was greater than one; // if the sequence is only one in length, we have already found it. if (verifier == null || verifier.matchesNoBoundsCheck(bytes, searchPosition + 1)) { return SearchUtils.singleResult(searchPosition, matcher); // match found. } // No match was found - shift backward by the shift for the current byte: searchPosition -= safeShifts[currentByte & 0xff]; } return SearchUtils.noResults(); }
/** * Searches forward using the Boyer Moore Horspool algorithm, using byte arrays from Windows to * handle shifting, and the WindowReader interface on the SequenceMatcher to verify whether a * match exists. */ @Override protected List<SearchResult<SequenceMatcher>> doSearchForwards( final WindowReader reader, final long fromPosition, final long toPosition) throws IOException { // Get the objects needed to search: final SearchInfo info = forwardInfo.get(); final int[] safeShifts = info.shifts; final ByteMatcher endOfSequence = info.matcher; final SequenceMatcher verifier = info.verifier; // Initialise window search: final long endSequencePosition = matcher.length() - 1; final long finalPosition = toPosition + endSequencePosition; long searchPosition = fromPosition + endSequencePosition; // While there is a window to search in: Window window; while (searchPosition <= finalPosition && (window = reader.getWindow(searchPosition)) != null) { // Initialise array search: final byte[] array = window.getArray(); final int arrayStartPosition = reader.getWindowOffset(searchPosition); final int arrayEndPosition = window.length() - 1; final int lastMatcherPosition = matcher.length() - 1; final long distanceToEnd = finalPosition - window.getWindowPosition() + lastMatcherPosition; final int lastSearchPosition = distanceToEnd < arrayEndPosition ? (int) distanceToEnd : arrayEndPosition; int arraySearchPosition = arrayStartPosition; // Search forwards in this array: ARRAY_SEARCH: while (arraySearchPosition <= lastSearchPosition) { // Shift forwards until we match the last position in the sequence, // or we run out of search space. byte currentByte = array[arraySearchPosition]; while (!endOfSequence.matches(currentByte)) { arraySearchPosition += safeShifts[currentByte & 0xff]; if (arraySearchPosition > lastSearchPosition) { break ARRAY_SEARCH; // outside the array, move on. } currentByte = array[arraySearchPosition]; } // The last byte matched - verify there is a complete match: final long arrayBytesSearched = arraySearchPosition - arrayStartPosition; final long matchPosition = searchPosition + arrayBytesSearched - endSequencePosition; if (verifier.matches(reader, matchPosition)) { return SearchUtils.singleResult(matchPosition, matcher); // match found. } // No match was found - shift forward by the shift for the current byte: arraySearchPosition += safeShifts[currentByte & 0xff]; } // No match was found in this array - calculate the current search position: searchPosition += arraySearchPosition - arrayStartPosition; } return SearchUtils.noResults(); }