/**
  * Computes content encoding from request and if not found uses pageEncoding and formEncoding to
  * see if URL was previously computed with a content type
  *
  * @param request {@link HttpRequestHdr}
  * @param pageEncodings Map<String, String>
  * @param formEncodings Map<String, String>
  * @return String content encoding
  */
 protected String computeContentEncoding(
     HttpRequestHdr request,
     Map<String, String> pageEncodings,
     Map<String, String> formEncodings,
     String urlWithoutQuery) {
   // Check if the request itself tells us what the encoding is
   String contentEncoding = null;
   String requestContentEncoding =
       ConversionUtils.getEncodingFromContentType(request.getContentType());
   if (requestContentEncoding != null) {
     contentEncoding = requestContentEncoding;
   } else {
     // Check if we know the encoding of the page
     if (pageEncodings != null) {
       synchronized (pageEncodings) {
         contentEncoding = pageEncodings.get(urlWithoutQuery);
       }
     }
     // Check if we know the encoding of the form
     if (formEncodings != null) {
       synchronized (formEncodings) {
         String formEncoding = formEncodings.get(urlWithoutQuery);
         // Form encoding has priority over page encoding
         if (formEncoding != null) {
           contentEncoding = formEncoding;
         }
       }
     }
   }
   return contentEncoding;
 }
예제 #2
0
  /** {@inheritDoc} */
  @Override
  public Iterator<URL> getEmbeddedResourceURLs(
      String userAgent, byte[] html, URL baseUrl, URLCollection urls, String encoding)
      throws HTMLParseException {
    Pattern pattern = null;
    Perl5Matcher matcher = null;
    try {
      matcher = JMeterUtils.getMatcher();
      PatternMatcherInput input = localInput.get();
      // TODO: find a way to avoid the cost of creating a String here --
      // probably a new PatternMatcherInput working on a byte[] would do
      // better.
      input.setInput(new String(html, encoding));
      pattern =
          JMeterUtils.getPatternCache()
              .getPattern(
                  REGEXP,
                  Perl5Compiler.CASE_INSENSITIVE_MASK
                      | Perl5Compiler.SINGLELINE_MASK
                      | Perl5Compiler.READ_ONLY_MASK);

      while (matcher.contains(input, pattern)) {
        MatchResult match = matcher.getMatch();
        String s;
        if (log.isDebugEnabled()) {
          log.debug("match groups " + match.groups() + " " + match.toString());
        }
        // Check for a BASE HREF:
        for (int g = 1; g <= NUM_BASE_GROUPS && g <= match.groups(); g++) {
          s = match.group(g);
          if (s != null) {
            if (log.isDebugEnabled()) {
              log.debug("new baseUrl: " + s + " - " + baseUrl.toString());
            }
            try {
              baseUrl = ConversionUtils.makeRelativeURL(baseUrl, s);
            } catch (MalformedURLException e) {
              // Doesn't even look like a URL?
              // Maybe it isn't: Ignore the exception.
              if (log.isDebugEnabled()) {
                log.debug("Can't build base URL from RL " + s + " in page " + baseUrl, e);
              }
            }
          }
        }
        for (int g = NUM_BASE_GROUPS + 1; g <= match.groups(); g++) {
          s = match.group(g);
          if (s != null) {
            if (log.isDebugEnabled()) {
              log.debug("group " + g + " - " + match.group(g));
            }
            urls.addURL(s, baseUrl);
          }
        }
      }
      return urls.iterator();
    } catch (UnsupportedEncodingException e) {
      throw new HTMLParseException(e.getMessage(), e);
    } catch (MalformedCachePatternException e) {
      throw new HTMLParseException(e.getMessage(), e);
    } finally {
      JMeterUtils.clearMatcherMemory(matcher, pattern);
    }
  }