Pattern[] matchRegexToDocument(String regex) {

    regex = interpolateTextMarksIntoRegex(regex);
    ArrayList<Pattern> patterns = new ArrayList<Pattern>();
    String fullText = util.loadTextFromId(url.toExternalForm()).replaceAll("\\s+", " ");
    tempRegex = fullText;
    String negatedPrecedingString = getNegatedPrecedingCharacters(regex);
    String negatedFollowingString = getNegatedFollowingChars(regex);
    int precedingIndexAdjustment = negatedPrecedingString.length();
    int followingIndexAdjustment = negatedFollowingString.length();
    regex = util.substituteDiacritics(regex);
    tempRegex += "<br><br>" + regex;
    Pattern pattern =
        Pattern.compile(
            regex,
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNIX_LINES | Pattern.DOTALL);
    Matcher matcher = pattern.matcher(fullText);

    while (matcher.find()) {
      int startIndex = matcher.start() - precedingIndexAdjustment;
      int endIndex = matcher.end() + followingIndexAdjustment;
      if (startIndex < 0) {
        startIndex = 0;
      }
      if (endIndex > fullText.length()) {
        endIndex = fullText.length();
      }
      String found = fullText.substring(startIndex, endIndex);
      found = anchorAssertionAtStart(found, precedingIndexAdjustment);
      found = anchorAssertionAtEnd(found, followingIndexAdjustment);
      if ((precedingIndexAdjustment == 0
              || !negatedPrecedingString.equals(found.substring(0, precedingIndexAdjustment + 1)))
          && (followingIndexAdjustment == 0
              || !negatedFollowingString.equals(
                  found.substring(found.length() - followingIndexAdjustment)))) {

        found = found.replaceAll("([()\\[\\]{}\\.])", ".");
        found = found.replaceAll("\\s{2,}", "\\\\b");
        String[] foundBits = found.split("[\\d]+\\.");
        for (int i = 0; i < foundBits.length; i++) {
          String fbit = foundBits[i].trim();
          fbit = fbit.replaceAll("\\s", "\\\\s+");
          highlightWords.add(fbit);
          Pattern foundPattern =
              Pattern.compile(
                  fbit,
                  Pattern.CASE_INSENSITIVE
                      | Pattern.UNICODE_CASE
                      | Pattern.UNIX_LINES
                      | Pattern.DOTALL);
          patterns.add(foundPattern);
        }
      }
    }

    Pattern[] arrPatterns = new Pattern[patterns.size()];
    return patterns.toArray(arrPatterns);
  }
  String getKWIC() {

    StringBuilder html = new StringBuilder();
    try {

      List<String> kwix =
          util.highlightMatches(util.loadTextFromId(url.toExternalForm()), highlightTerms);
      html.append("<tr class=\"result-text\"><td class=\"kwic\" colspan=\"7\">");
      for (String kwic : kwix) {
        html.append(
            kwic.replaceAll(
                "\\s*ⓐ\\s*",
                "")); // TODO: why is this character sneaking through when the user does a regex
        // word-boundary (\b) search?
        html.append("<br/>\n");
      }
      html.append("</td></tr>");
    } catch (Exception e) {
      // TODO: Need to do something sensible here with regard to highlighting
      logger.error("Highlightling failure", e);
    }
    return html.toString();
  }