/** * Scans a given file for an exact match to the word target. We have chosen to use equals and * contains so that sites including the target in the URL get included. * * @param url The link to scan * @param target The word to search for * @return Whether or not this file contains the target word. */ public boolean scan(final URL url, final String target) { boolean foundTarget = false; In in = new In(url); String[] strings; try { strings = in.readAllStrings(); } catch (NullPointerException e) { return false; } for (String word : strings) { // If the word can be identified as a link if (word.startsWith(LINK_IDENTIFIER + "http")) { /* Prepares the link for construction into a URL. First it replaces the link identifier, typically a string like href=", however this is only valid for the beginning. We later use a split to remove the " following the actual URL, as well as possibly a tag closure like >. The reason for implementing this with split, is that we want to preserve the word immediately following the tag closure if applicable. This is because this may be a word matching target, so we later replace word with the result of that split where applicable. */ String[] splits = word.replace(LINK_IDENTIFIER, "").split("\"(>|)"); if (splits[0] != null) { splits[0] = splits[0].replace("&", "&"); try { URL newLink = new URL(splits[0]); // Don't enqueue the link if we've already discovered it. if (!discovered.contains(newLink) && discovered.size() < max) { queue.enqueue(newLink); discovered.add(newLink); } } catch (MalformedURLException ignored) { // We have already turned the string in question into a valid link. It should not be // identified // as a link if it does not conform to the above statements. It could still be saved // with more // complex parsing, but we don't wish to expend resources parsing javascripts or by // analysing // patterns to determine what to fix. } } if (splits.length > 1 && splits[1] != null) { word = splits[1]; } } // Make sure we check for lower case values when matching words. if (word.toLowerCase().equals(target) || word.toLowerCase().contains(target)) { foundTarget = true; } } return foundTarget; }
public static void main(String[] args) { In in = new In(args[0]); String[] a = in.readAllStrings(); System.out.println("Quick 3 way sort:"); Stopwatch timer = new Stopwatch(); Quick3Way.sort(a); double elapsed = timer.elapsedTime(); System.out.println("Time:" + elapsed); show(a); }
/** * A test client * * @param args {@code args[0]} = Input-file */ public static void main(String args[]) { In in = new In(args[0]); int N = in.readInt(); UnionFindWQU qf = new UnionFindWQU(N); while (!in.isEmpty()) { int p = in.readInt(); int q = in.readInt(); if (qf.connected(p, q)) continue; qf.union(p, q); System.out.println(p + " " + q); } System.out.println(qf.componentCount() + " components"); }
public WordNet(String synsetFileName, String hyponymFileName) { In synsetFile = new In(synsetFileName); In hyponymFile = new In(hyponymFileName); String[] currentLine; String[] splitNouns; while (!synsetFile.isEmpty()) { currentLine = synsetFile.readLine().split(","); map.put(Integer.parseInt(currentLine[0]), currentLine[1]); splitNouns = currentLine[1].split(" "); for (int i = 0; i < splitNouns.length; i += 1) { if (!nouns.contains(splitNouns[i])) { nouns.add(splitNouns[i]); } } } synsets = new Digraph(map.size()); while (!hyponymFile.isEmpty()) { currentLine = hyponymFile.readLine().split(","); for (int i = 1; i < currentLine.length; i++) { synsets.addEdge(Integer.parseInt(currentLine[0]), Integer.parseInt(currentLine[i])); } } }