/* ** calculate, given the examine/ignore and save/refuse values, whether ** to examine and/or save s. */ private boolean[] EISR( String s, String which, String examine[], String ignore[], String save[], String refuse[]) { if (s == null) return (null); logger.fine(s); logger.fine(which); logger.fine(java.util.Arrays.toString(examine)); logger.fine(java.util.Arrays.toString(ignore)); logger.fine(java.util.Arrays.toString(save)); logger.fine(java.util.Arrays.toString(refuse)); boolean E = Utils.blurf(examine, ignore, s, false); boolean S = Utils.blurf(save, refuse, s, false); if (args.PrintExamine && E) logger.info("Examining " + which + ": " + s); if (args.PrintIgnore && !E) logger.info("Ignoring " + which + ": " + s); if (args.PrintSave && S) logger.info("Saving " + which + ": " + s); if (args.PrintRefuse && !S) logger.info("Refusing " + which + ": " + s); boolean ret[] = new boolean[2]; ret[0] = E; ret[1] = S; return (ret); }
private void addToURLs(String baseURL, String strings[]) { logger.entering(baseURL); logger.entering(strings); for (int i = 0; i < strings.length; i++) { String next = replaceAll(strings[i], args.URLFixUp); String newBase = newBase(next); if (newBase != null) { logger.fine("Setting base to " + baseURL); baseURL = newBase; } String possible[] = interesting(next); for (int j = 0; j < args.Interesting.length; j++) { if (possible[j] != null) { URL u = makeURL(baseURL, possible[j]); if (u == null) continue; String total = u.toString(); String PathAccept[] = args.PathAccept; String PathReject[] = args.PathReject; boolean accept = Utils.blurf(PathAccept, PathReject, total, true); if (args.PrintAccept && accept) logger.info("Accepting path: " + total); if (args.PrintReject && !accept) logger.info("Rejecting path: " + total); if (accept) { if (args.URLRewrite != null) total = REplican.replaceAll(total, args.URLRewrite); // if we don't already have it if (urls.get(total) == null) { if (args.PrintAdd) logger.info("Adding: " + total); addOne(total); } } } } } }
private void setDefaults() { if (args.Interesting == null) { String urlref = "\\s*=\\s*[\"']?([^\"'>]*)"; String href = "[hH][rR][eE][fF]"; String src = "[sS][rR][cC]"; String init[] = { href + urlref, src + urlref, }; args.Interesting = init; } if (args.URLFixUp == null) { // so, i don't remember why i collasped multiple spaces and // removed \'s. must have been important and i should have // documented. 's confuse URLs... // args.URLFixUp = new String[]{"\\s+", " ", "\\\\", ""}; args.URLFixUp = new String[] {"\\s+", " ", "\\\\", "", "\'", "%27"}; } // if they don't specify anything, look at only text. if (args.MIMEExamine == null && args.MIMEIgnore == null && args.PathExamine == null && args.PathIgnore == null) { args.MIMEExamine = new String[] {"text/.*"}; if (args.PrintExamine) logger.warning("--MIMEExamine=" + java.util.Arrays.toString(args.MIMEExamine)); } // if they don't specify anything, save only what is specified on // the command line. if (args.MIMESave == null && args.MIMERefuse == null && args.PathSave == null && args.PathRefuse == null) { if (args.additional.length == 0) { logger.severe("No URLs specified"); System.exit(1); } args.PathSave = new String[args.additional.length]; for (int i = 0; i < args.additional.length; i++) args.PathSave[i] = escapeURL(args.additional[i]); if (args.PrintSave) logger.warning("--PathSave=" + java.util.Arrays.toString(args.PathSave)); } if (args.PrintAll) args.PrintAccept = args.PrintReject = args.PrintSave = args.PrintRefuse = args.PrintExamine = args.PrintIgnore = true; /* ** make sure we accept everything we examine, save, and the initial ** URLs */ args.PathAccept = Utils.combineArrays(args.PathAccept, args.PathExamine); args.PathAccept = Utils.combineArrays(args.PathAccept, args.PathSave); args.PathAccept = Utils.combineArrays(args.PathAccept, args.additional); }