Esempio n. 1
0
  /*
   ** calculate, given the examine/ignore and save/refuse values, whether
   ** to examine and/or save s.
   */
  private boolean[] EISR(
      String s, String which, String examine[], String ignore[], String save[], String refuse[]) {
    if (s == null) return (null);

    logger.fine(s);
    logger.fine(which);
    logger.fine(java.util.Arrays.toString(examine));
    logger.fine(java.util.Arrays.toString(ignore));
    logger.fine(java.util.Arrays.toString(save));
    logger.fine(java.util.Arrays.toString(refuse));

    boolean E = Utils.blurf(examine, ignore, s, false);
    boolean S = Utils.blurf(save, refuse, s, false);

    if (args.PrintExamine && E) logger.info("Examining " + which + ": " + s);
    if (args.PrintIgnore && !E) logger.info("Ignoring " + which + ": " + s);

    if (args.PrintSave && S) logger.info("Saving " + which + ": " + s);
    if (args.PrintRefuse && !S) logger.info("Refusing " + which + ": " + s);

    boolean ret[] = new boolean[2];
    ret[0] = E;
    ret[1] = S;
    return (ret);
  }
Esempio n. 2
0
  private void addToURLs(String baseURL, String strings[]) {
    logger.entering(baseURL);
    logger.entering(strings);

    for (int i = 0; i < strings.length; i++) {
      String next = replaceAll(strings[i], args.URLFixUp);

      String newBase = newBase(next);
      if (newBase != null) {
        logger.fine("Setting base to " + baseURL);
        baseURL = newBase;
      }

      String possible[] = interesting(next);

      for (int j = 0; j < args.Interesting.length; j++) {
        if (possible[j] != null) {
          URL u = makeURL(baseURL, possible[j]);

          if (u == null) continue;

          String total = u.toString();

          String PathAccept[] = args.PathAccept;
          String PathReject[] = args.PathReject;

          boolean accept = Utils.blurf(PathAccept, PathReject, total, true);

          if (args.PrintAccept && accept) logger.info("Accepting path: " + total);
          if (args.PrintReject && !accept) logger.info("Rejecting path: " + total);

          if (accept) {
            if (args.URLRewrite != null) total = REplican.replaceAll(total, args.URLRewrite);

            // if we don't already have it
            if (urls.get(total) == null) {
              if (args.PrintAdd) logger.info("Adding: " + total);
              addOne(total);
            }
          }
        }
      }
    }
  }
Esempio n. 3
0
  private void setDefaults() {
    if (args.Interesting == null) {
      String urlref = "\\s*=\\s*[\"']?([^\"'>]*)";
      String href = "[hH][rR][eE][fF]";
      String src = "[sS][rR][cC]";

      String init[] = {
        href + urlref, src + urlref,
      };

      args.Interesting = init;
    }

    if (args.URLFixUp == null) {
      // so, i don't remember why i collasped multiple spaces and
      // removed \'s. must have been important and i should have
      // documented. 's confuse URLs...
      // args.URLFixUp = new String[]{"\\s+", " ", "\\\\", ""};
      args.URLFixUp = new String[] {"\\s+", " ", "\\\\", "", "\'", "%27"};
    }

    // if they don't specify anything, look at only text.
    if (args.MIMEExamine == null
        && args.MIMEIgnore == null
        && args.PathExamine == null
        && args.PathIgnore == null) {
      args.MIMEExamine = new String[] {"text/.*"};
      if (args.PrintExamine)
        logger.warning("--MIMEExamine=" + java.util.Arrays.toString(args.MIMEExamine));
    }

    // if they don't specify anything, save only what is specified on
    // the command line.
    if (args.MIMESave == null
        && args.MIMERefuse == null
        && args.PathSave == null
        && args.PathRefuse == null) {
      if (args.additional.length == 0) {
        logger.severe("No URLs specified");
        System.exit(1);
      }

      args.PathSave = new String[args.additional.length];

      for (int i = 0; i < args.additional.length; i++)
        args.PathSave[i] = escapeURL(args.additional[i]);

      if (args.PrintSave) logger.warning("--PathSave=" + java.util.Arrays.toString(args.PathSave));
    }

    if (args.PrintAll)
      args.PrintAccept =
          args.PrintReject =
              args.PrintSave = args.PrintRefuse = args.PrintExamine = args.PrintIgnore = true;

    /*
     ** make sure we accept everything we examine, save, and the initial
     ** URLs
     */
    args.PathAccept = Utils.combineArrays(args.PathAccept, args.PathExamine);
    args.PathAccept = Utils.combineArrays(args.PathAccept, args.PathSave);
    args.PathAccept = Utils.combineArrays(args.PathAccept, args.additional);
  }