/** * Scans a given file for an exact match to the word target. We have chosen to use equals and * contains so that sites including the target in the URL get included. * * @param url The link to scan * @param target The word to search for * @return Whether or not this file contains the target word. */ public boolean scan(final URL url, final String target) { boolean foundTarget = false; In in = new In(url); String[] strings; try { strings = in.readAllStrings(); } catch (NullPointerException e) { return false; } for (String word : strings) { // If the word can be identified as a link if (word.startsWith(LINK_IDENTIFIER + "http")) { /* Prepares the link for construction into a URL. First it replaces the link identifier, typically a string like href=", however this is only valid for the beginning. We later use a split to remove the " following the actual URL, as well as possibly a tag closure like >. The reason for implementing this with split, is that we want to preserve the word immediately following the tag closure if applicable. This is because this may be a word matching target, so we later replace word with the result of that split where applicable. */ String[] splits = word.replace(LINK_IDENTIFIER, "").split("\"(>|)"); if (splits[0] != null) { splits[0] = splits[0].replace("&", "&"); try { URL newLink = new URL(splits[0]); // Don't enqueue the link if we've already discovered it. if (!discovered.contains(newLink) && discovered.size() < max) { queue.enqueue(newLink); discovered.add(newLink); } } catch (MalformedURLException ignored) { // We have already turned the string in question into a valid link. It should not be // identified // as a link if it does not conform to the above statements. It could still be saved // with more // complex parsing, but we don't wish to expend resources parsing javascripts or by // analysing // patterns to determine what to fix. } } if (splits.length > 1 && splits[1] != null) { word = splits[1]; } } // Make sure we check for lower case values when matching words. if (word.toLowerCase().equals(target) || word.toLowerCase().contains(target)) { foundTarget = true; } } return foundTarget; }
public static void main(String[] args) { In in = new In(args[0]); String[] a = in.readAllStrings(); System.out.println("Quick 3 way sort:"); Stopwatch timer = new Stopwatch(); Quick3Way.sort(a); double elapsed = timer.elapsedTime(); System.out.println("Time:" + elapsed); show(a); }