コード例 #1
0
  Pattern[] matchRegexToDocument(String regex) {

    regex = interpolateTextMarksIntoRegex(regex);
    ArrayList<Pattern> patterns = new ArrayList<Pattern>();
    String fullText = util.loadTextFromId(url.toExternalForm()).replaceAll("\\s+", " ");
    tempRegex = fullText;
    String negatedPrecedingString = getNegatedPrecedingCharacters(regex);
    String negatedFollowingString = getNegatedFollowingChars(regex);
    int precedingIndexAdjustment = negatedPrecedingString.length();
    int followingIndexAdjustment = negatedFollowingString.length();
    regex = util.substituteDiacritics(regex);
    tempRegex += "<br><br>" + regex;
    Pattern pattern =
        Pattern.compile(
            regex,
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.UNIX_LINES | Pattern.DOTALL);
    Matcher matcher = pattern.matcher(fullText);

    while (matcher.find()) {
      int startIndex = matcher.start() - precedingIndexAdjustment;
      int endIndex = matcher.end() + followingIndexAdjustment;
      if (startIndex < 0) {
        startIndex = 0;
      }
      if (endIndex > fullText.length()) {
        endIndex = fullText.length();
      }
      String found = fullText.substring(startIndex, endIndex);
      found = anchorAssertionAtStart(found, precedingIndexAdjustment);
      found = anchorAssertionAtEnd(found, followingIndexAdjustment);
      if ((precedingIndexAdjustment == 0
              || !negatedPrecedingString.equals(found.substring(0, precedingIndexAdjustment + 1)))
          && (followingIndexAdjustment == 0
              || !negatedFollowingString.equals(
                  found.substring(found.length() - followingIndexAdjustment)))) {

        found = found.replaceAll("([()\\[\\]{}\\.])", ".");
        found = found.replaceAll("\\s{2,}", "\\\\b");
        String[] foundBits = found.split("[\\d]+\\.");
        for (int i = 0; i < foundBits.length; i++) {
          String fbit = foundBits[i].trim();
          fbit = fbit.replaceAll("\\s", "\\\\s+");
          highlightWords.add(fbit);
          Pattern foundPattern =
              Pattern.compile(
                  fbit,
                  Pattern.CASE_INSENSITIVE
                      | Pattern.UNICODE_CASE
                      | Pattern.UNIX_LINES
                      | Pattern.DOTALL);
          patterns.add(foundPattern);
        }
      }
    }

    Pattern[] arrPatterns = new Pattern[patterns.size()];
    return patterns.toArray(arrPatterns);
  }
コード例 #2
0
  // used for KWIC highlighting
  final Pattern[] buildHighlightTerms(ArrayList<SearchClause> searchClauses) {

    ArrayList<Pattern> hilites = new ArrayList<Pattern>();
    Iterator<SearchClause> stit = searchClauses.iterator();
    while (stit.hasNext()) {

      try {
        SearchClause searchClause = stit.next();
        String transformedString = searchClause.buildTransformedString();
        if (transformedString == null) {
          continue;
        }
        if ("".equals(transformedString)) {
          continue;
        }
        if (searchClause.getAllClauseRoles().contains(ClauseRole.REGEX)) {
          String trimmedRegex = trimRegex(transformedString);
          Pattern[] regexPatterns = matchRegexToDocument(trimmedRegex);
          hilites.addAll(Arrays.asList(regexPatterns));

        } else if (searchClause.parseForSearchType() == StringSearchFacet.SearchType.PROXIMITY) {
          transformedString = transformedString.replaceAll("(\\d+)w", "");
          tempRegex = "Prox";
          hilites.addAll(Arrays.asList(util.getPhraseHighlightPatterns(transformedString)));

        } else if (searchClause.parseForSearchType() == StringSearchFacet.SearchType.SUBSTRING) {
          Pattern[] patterns = util.getSubstringHighlightPatterns(transformedString);
          for (int i = 0; i < patterns.length; i++) {

            tempRegex += "###  ";
            tempRegex += patterns[i].toString();
            tempRegex += " ###";
          }
          hilites.addAll(Arrays.asList(patterns));

        } else {
          tempRegex = "Other";
          Pattern[] patterns = util.getPhraseHighlightPatterns(transformedString);
          hilites.addAll(Arrays.asList(patterns));
        }

      } catch (Exception e) {
      }
    }
    Pattern[] patterns = new Pattern[hilites.size()];
    return hilites.toArray(patterns);
  }
コード例 #3
0
  String getKWIC() {

    StringBuilder html = new StringBuilder();
    try {

      List<String> kwix =
          util.highlightMatches(util.loadTextFromId(url.toExternalForm()), highlightTerms);
      html.append("<tr class=\"result-text\"><td class=\"kwic\" colspan=\"7\">");
      for (String kwic : kwix) {
        html.append(
            kwic.replaceAll(
                "\\s*ⓐ\\s*",
                "")); // TODO: why is this character sneaking through when the user does a regex
        // word-boundary (\b) search?
        html.append("<br/>\n");
      }
      html.append("</td></tr>");
    } catch (Exception e) {
      // TODO: Need to do something sensible here with regard to highlighting
      logger.error("Highlightling failure", e);
    }
    return html.toString();
  }