Exemplo n.º 1
0
  /**
   * Extract JS content from the given HTML content. The HTML elements are replaced with space and
   * JS content is kept. JS content is declared inside the given tags elements.
   *
   * @param html the HTML content which contains JS content.
   * @param tagRegions list of HTML tags which contains JS content.
   * @return the result of the extract of JS content from the given HTML content. The HTML elements
   *     are replaced with space and JS content is kept.
   */
  public static String extractJS(String html, ScriptTagRegion... tagRegions) {
    IState state = createState(tagRegions);
    StringBuilder s = new StringBuilder();
    char[] chars = html.toCharArray();
    for (int i = 0; i < chars.length; i++) {
      char c = chars[i];
      switch (c) {
        case '\n':
        case '\r':
        case '\t':
        case ' ':
          s.append(c);
          break;
        default:
          // try to search region
          Region matchedRegion = state.update(c);
          if (matchedRegion == null) {
            // none matched region
            if (state.isNextRegionToFindType(RegionType.END_SCRIPT)) {
              // the next region to find is end script (ex :
              // </script>)
              // we are inside script element content, add JS
              // character inside the buffer.
              s.append(c);
            } else {
              // here we are not inside script content, add a space.
              s.append(' ');
            }
          } else {
            // a region is found
            if (matchedRegion.getType().equals(RegionType.END_SCRIPT)) {
              // the matched region is end script (ex : </script>)
              // replace last characters of the buffer (</script>)
              // with spaces.
              int length = matchedRegion.getLength();
              s = s.replace(i - length, i, matchedRegion.getSpaces());
              // reset the state.
              state.reset();
            }
            s.append(' ');
          }
      }
    }

    return s.toString();
  }