Beispiel #1
0
  /**
   * Update URLs inside the page, so those URLs which must be correct at page load time resolve
   * correctly to absolute URLs.
   *
   * <p>This means ensuring there is a BASE HREF tag, adding one if missing, and then resolving:
   * FRAME-SRC, META-URL, LINK-HREF, SCRIPT-SRC tag-attribute pairs against either the existing
   * BASE-HREF, or the page's absolute URL if it was missing.
   */
  public void resolvePageUrls() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();

    String existingBaseHref = TagMagix.getBaseHref(sb);
    if (existingBaseHref == null) {
      insertAtStartOfHead("<base href=\"" + pageUrl + "\" />");
    } else {
      pageUrl = existingBaseHref;
    }

    String markups[][] = {
      {"FRAME", "SRC"},
      {"META", "URL"},
      {"LINK", "HREF"},
      {"SCRIPT", "SRC"},
      {TagMagix.ANY_TAGNAME, "background"}
    };
    // TODO: The classic WM added a js_ to the datespec, so NotInArchives
    // can return an valid javascript doc, and not cause Javascript errors.
    for (String tagAttr[] : markups) {
      TagMagix.markupTagREURIC(sb, uriConverter, captureDate, pageUrl, tagAttr[0], tagAttr[1]);
    }
    TagMagix.markupCSSImports(sb, uriConverter, captureDate, pageUrl);
    TagMagix.markupStyleUrls(sb, uriConverter, captureDate, pageUrl);
  }
Beispiel #2
0
  /**
   * Update all URLs inside the page, so they resolve correctly to absolute URLs within the Wayback
   * service.
   */
  public void resolveAllPageUrls() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();

    String existingBaseHref = TagMagix.getBaseHref(sb);
    if (existingBaseHref != null) {
      pageUrl = existingBaseHref;
    }
    ResultURIConverter ruc = new SpecialResultURIConverter(uriConverter);

    // TODO: forms...?
    String markups[][] = {
      {"FRAME", "SRC"},
      {"META", "URL"},
      {"LINK", "HREF"},
      {"SCRIPT", "SRC"},
      {"IMG", "SRC"},
      {"A", "HREF"},
      {"AREA", "HREF"},
      {"OBJECT", "CODEBASE"},
      {"OBJECT", "CDATA"},
      {"APPLET", "CODEBASE"},
      {"APPLET", "ARCHIVE"},
      {"EMBED", "SRC"},
      {"IFRAME", "SRC"},
      {TagMagix.ANY_TAGNAME, "background"}
    };
    for (String tagAttr[] : markups) {
      TagMagix.markupTagREURIC(sb, ruc, captureDate, pageUrl, tagAttr[0], tagAttr[1]);
    }
    TagMagix.markupCSSImports(sb, uriConverter, captureDate, pageUrl);
    TagMagix.markupStyleUrls(sb, uriConverter, captureDate, pageUrl);
  }
Beispiel #3
0
 /** @param toInsert */
 public void insertAtStartOfBody(String toInsert) {
   int insertPoint = TagMagix.getEndOfFirstTag(sb, "body");
   if (-1 == insertPoint) {
     insertPoint = 0;
   }
   sb.insert(insertPoint, toInsert);
 }
Beispiel #4
0
  public void resolveASXRefUrls() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();
    ResultURIConverter ruc = new MMSToHTTPResultURIConverter(uriConverter);

    TagMagix.markupTagREURIC(sb, ruc, captureDate, pageUrl, "REF", "HREF");
  }
Beispiel #5
0
  public void addBase() {

    // TODO: get url from Resource instead of SearchResult?
    String pageUrl = result.getOriginalUrl();
    String captureDate = result.getCaptureTimestamp();

    String existingBaseHref = TagMagix.getBaseHref(sb);
    if (existingBaseHref == null) {
      insertAtStartOfHead("<base href=\"" + pageUrl + "\" />");
    } else {
      pageUrl = existingBaseHref;
    }
  }
Beispiel #6
0
 public void resolveCSSUrls() {
   // TODO: get url from Resource instead of SearchResult?
   String pageUrl = result.getOriginalUrl();
   String captureDate = result.getCaptureTimestamp();
   TagMagix.markupCSSImports(sb, uriConverter, captureDate, pageUrl);
 }