Пример #1
0
    /**
     * Calculates the safe shifts to use if searching backwards. A safe shift is either the length
     * of the sequence, if the byte does not appear in the {@link SequenceMatcher}, or the shortest
     * distance it appears from the beginning of the matcher, with zero being the value of the first
     * position in the sequence.
     */
    @Override
    public SearchInfo create() {
      // Get info about the matcher:
      final SequenceMatcher sequence = getMatcher();
      final int sequenceLength = sequence.length();

      // Create the search info object fields
      final int lastPosition = sequenceLength - 1;
      final ByteMatcher byteMatcher = sequence.getMatcherForPosition(0);
      final SequenceMatcher verifier =
          (lastPosition == 0) ? null : sequence.subsequence(1, sequenceLength);
      // Set the default shift to the length of the sequence
      final int[] shifts = new int[256];
      Arrays.fill(shifts, sequenceLength);

      // Now set specific byte shifts for the bytes actually in
      // the sequence itself.  The shift is the position in the sequence,
      // but we do not create a shift for the first position 0.
      for (int sequencePos = lastPosition; sequencePos > 0; sequencePos--) {
        final ByteMatcher aMatcher = sequence.getMatcherForPosition(sequencePos);
        final byte[] matchingBytes = aMatcher.getMatchingBytes();
        for (final byte b : matchingBytes) {
          shifts[b & 0xFF] = sequencePos;
        }
      }
      return new SearchInfo(shifts, byteMatcher, verifier);
    }
Пример #2
0
    /**
     * Calculates the safe shifts to use if searching forwards. A safe shift is either the length of
     * the sequence, if the byte does not appear in the {@link SequenceMatcher}, or the shortest
     * distance it appears from the end of the matcher.
     */
    @Override
    public SearchInfo create() {
      // Get info about the matcher:
      final SequenceMatcher sequence = getMatcher();
      final int sequenceLength = sequence.length();

      // Create the search info object fields:
      final int lastPosition = sequenceLength - 1;
      final ByteMatcher byteMatcher = sequence.getMatcherForPosition(lastPosition);
      final SequenceMatcher verifier =
          (lastPosition == 0)
              ? AnyByteMatcher.ANY_BYTE_MATCHER
              : sequence.subsequence(0, lastPosition);

      // Set the default shift to the length of the sequence for all possible byte values:
      final int[] shifts = new int[256];
      Arrays.fill(shifts, sequenceLength);

      // Now set specific shifts for the bytes actually in
      // the sequence itself.  The shift is the distance of a position
      // from the end of the sequence, but we do not create a shift for
      // the very last position.
      for (int sequencePos = 0; sequencePos < lastPosition; sequencePos++) {
        final ByteMatcher aMatcher = sequence.getMatcherForPosition(sequencePos);
        final byte[] matchingBytes = aMatcher.getMatchingBytes();
        final int distanceFromEnd = sequenceLength - sequencePos - 1;
        for (final byte b : matchingBytes) {
          shifts[b & 0xFF] = distanceFromEnd;
        }
      }

      return new SearchInfo(shifts, byteMatcher, verifier);
    }
Пример #3
0
  /** {@inheritDoc} */
  @Override
  protected List<SearchResult<SequenceMatcher>> doSearchBackwards(
      final WindowReader reader, final long fromPosition, final long toPosition)
      throws IOException {

    // Initialise:
    final SearchInfo info = backwardInfo.get();
    final int[] safeShifts = info.shifts;
    final ByteMatcher startOfSequence = info.matcher;
    final SequenceMatcher verifier = info.verifier;
    long searchPosition = fromPosition;

    // Search backwards across the windows:
    Window window;
    while (searchPosition >= toPosition && (window = reader.getWindow(searchPosition)) != null) {

      // Initialise the window search:
      final byte[] array = window.getArray();
      final int arrayStartPosition = reader.getWindowOffset(searchPosition);
      final long distanceToEnd = toPosition - window.getWindowPosition();
      final int lastSearchPosition = distanceToEnd > 0 ? (int) distanceToEnd : 0;
      int arraySearchPosition = arrayStartPosition;

      // Search using the byte array for shifts, using the WindowReader
      // for verifiying the sequence with the matcher:
      ARRAY_SEARCH:
      while (arraySearchPosition >= lastSearchPosition) {

        // Shift backwards until we match the first position in the sequence,
        // or we run out of search space.
        byte currentByte = array[arraySearchPosition];
        while (!startOfSequence.matches(currentByte)) {
          arraySearchPosition -= safeShifts[currentByte & 0xff];
          if (arraySearchPosition < lastSearchPosition) {
            break ARRAY_SEARCH;
          }
          currentByte = array[arraySearchPosition];
        }

        // The first byte matched - verify there is a complete match.
        final int totalShift = arrayStartPosition - arraySearchPosition;
        final long sequencePosition = searchPosition - totalShift;
        if (verifier == null || verifier.matches(reader, sequencePosition + 1)) {
          return SearchUtils.singleResult(sequencePosition, matcher); // match found.
        }

        // No match was found - shift backward by the shift for the current byte:
        arraySearchPosition -= safeShifts[currentByte & 0xff];
      }

      // No match was found in this array - calculate the current search position:
      searchPosition -= (arrayStartPosition - arraySearchPosition);
    }

    return SearchUtils.noResults();
  }
Пример #4
0
  /** {@inheritDoc} */
  @Override
  public List<SearchResult<SequenceMatcher>> searchForwards(
      final byte[] bytes, final int fromPosition, final int toPosition) {

    // Get the objects needed to search:
    final SearchInfo info = forwardInfo.get();
    final int[] safeShifts = info.shifts;
    final ByteMatcher endOfSequence = info.matcher;
    final SequenceMatcher verifier = info.verifier;

    // Determine a safe position to start searching at.
    final int lastMatcherPosition = getMatcher().length() - 1;
    int searchPosition =
        fromPosition > 0 ? fromPosition + lastMatcherPosition : lastMatcherPosition;

    // Calculate safe bounds for the end of the search:
    final int lastPossiblePosition = bytes.length - 1;
    final int lastPossibleSearchPosition = toPosition + lastMatcherPosition;
    final int finalPosition =
        lastPossibleSearchPosition < lastPossiblePosition
            ? lastPossibleSearchPosition
            : lastPossiblePosition;

    // Search forwards:
    while (searchPosition <= finalPosition) {

      // Shift forwards until we match the last position in the sequence,
      // or we run out of search space (in which case just return not found).
      byte currentByte = bytes[searchPosition];
      while (!endOfSequence.matches(currentByte)) {
        searchPosition += safeShifts[currentByte & 0xff];
        if (searchPosition > finalPosition) {
          return SearchUtils.noResults();
        }
        currentByte = bytes[searchPosition];
      }

      // The last byte matched - verify there is a complete match:
      final int startMatchPosition = searchPosition - lastMatcherPosition;
      if (verifier.matchesNoBoundsCheck(bytes, startMatchPosition)) {
        return SearchUtils.singleResult(startMatchPosition, matcher); // match found.
      }

      // No match was found - shift forward by the shift for the current byte:
      searchPosition += safeShifts[currentByte & 0xff];
    }

    return SearchUtils.noResults();
  }
Пример #5
0
  /** {@inheritDoc} */
  @Override
  public List<SearchResult<SequenceMatcher>> searchBackwards(
      final byte[] bytes, final int fromPosition, final int toPosition) {

    // Get objects needed for the search:
    final SearchInfo info = backwardInfo.get();
    final int[] safeShifts = info.shifts;
    final ByteMatcher startOfSequence = info.matcher;
    final SequenceMatcher verifier = info.verifier;

    // Calculate safe bounds for the start of the search:
    final int firstPossiblePosition = bytes.length - getMatcher().length();
    int searchPosition =
        fromPosition < firstPossiblePosition ? fromPosition : firstPossiblePosition;

    // Calculate safe bounds for the end of the search:
    final int lastPosition = toPosition > 0 ? toPosition : 0;

    // Search backwards:
    while (searchPosition >= lastPosition) {

      // Shift backwards until we match the first position in the
      // sequence, or we run out of search space:
      byte currentByte = bytes[searchPosition];
      while (!startOfSequence.matches(currentByte)) {
        searchPosition -= safeShifts[currentByte & 0xFF];
        if (searchPosition < lastPosition) {
          return SearchUtils.noResults();
        }
        currentByte = bytes[searchPosition];
      }

      // The first byte matched - verify there is a complete match.
      // There is only a verifier if the sequence length was greater than one;
      // if the sequence is only one in length, we have already found it.
      if (verifier == null || verifier.matchesNoBoundsCheck(bytes, searchPosition + 1)) {
        return SearchUtils.singleResult(searchPosition, matcher); // match found.
      }

      // No match was found - shift backward by the shift for the current byte:
      searchPosition -= safeShifts[currentByte & 0xff];
    }

    return SearchUtils.noResults();
  }
Пример #6
0
  /**
   * Searches forward using the Boyer Moore Horspool algorithm, using byte arrays from Windows to
   * handle shifting, and the WindowReader interface on the SequenceMatcher to verify whether a
   * match exists.
   */
  @Override
  protected List<SearchResult<SequenceMatcher>> doSearchForwards(
      final WindowReader reader, final long fromPosition, final long toPosition)
      throws IOException {

    // Get the objects needed to search:
    final SearchInfo info = forwardInfo.get();
    final int[] safeShifts = info.shifts;
    final ByteMatcher endOfSequence = info.matcher;
    final SequenceMatcher verifier = info.verifier;

    // Initialise window search:
    final long endSequencePosition = matcher.length() - 1;
    final long finalPosition = toPosition + endSequencePosition;
    long searchPosition = fromPosition + endSequencePosition;

    // While there is a window to search in:
    Window window;
    while (searchPosition <= finalPosition && (window = reader.getWindow(searchPosition)) != null) {

      // Initialise array search:
      final byte[] array = window.getArray();
      final int arrayStartPosition = reader.getWindowOffset(searchPosition);
      final int arrayEndPosition = window.length() - 1;
      final int lastMatcherPosition = matcher.length() - 1;
      final long distanceToEnd = finalPosition - window.getWindowPosition() + lastMatcherPosition;
      final int lastSearchPosition =
          distanceToEnd < arrayEndPosition ? (int) distanceToEnd : arrayEndPosition;
      int arraySearchPosition = arrayStartPosition;

      // Search forwards in this array:
      ARRAY_SEARCH:
      while (arraySearchPosition <= lastSearchPosition) {

        // Shift forwards until we match the last position in the sequence,
        // or we run out of search space.
        byte currentByte = array[arraySearchPosition];
        while (!endOfSequence.matches(currentByte)) {
          arraySearchPosition += safeShifts[currentByte & 0xff];
          if (arraySearchPosition > lastSearchPosition) {
            break ARRAY_SEARCH; // outside the array, move on.
          }
          currentByte = array[arraySearchPosition];
        }

        // The last byte matched - verify there is a complete match:
        final long arrayBytesSearched = arraySearchPosition - arrayStartPosition;
        final long matchPosition = searchPosition + arrayBytesSearched - endSequencePosition;
        if (verifier.matches(reader, matchPosition)) {
          return SearchUtils.singleResult(matchPosition, matcher); // match found.
        }

        // No match was found - shift forward by the shift for the current byte:
        arraySearchPosition += safeShifts[currentByte & 0xff];
      }

      // No match was found in this array - calculate the current search position:
      searchPosition += arraySearchPosition - arrayStartPosition;
    }

    return SearchUtils.noResults();
  }