/**
   * Scans a given file for an exact match to the word target. We have chosen to use equals and
   * contains so that sites including the target in the URL get included.
   *
   * @param url The link to scan
   * @param target The word to search for
   * @return Whether or not this file contains the target word.
   */
  public boolean scan(final URL url, final String target) {
    boolean foundTarget = false;
    In in = new In(url);
    String[] strings;
    try {
      strings = in.readAllStrings();
    } catch (NullPointerException e) {
      return false;
    }

    for (String word : strings) {
      // If the word can be identified as a link
      if (word.startsWith(LINK_IDENTIFIER + "http")) {

        /*
           Prepares the link for construction into a URL. First it replaces the link identifier, typically
           a string like href=", however this is only valid for the beginning. We later use a split to remove the
           " following the actual URL, as well as possibly a tag closure like >. The reason for implementing this
           with split, is that we want to preserve the word immediately following the tag closure if applicable.
           This is because this may be a word matching target, so we later replace word with the result of that
           split where applicable.
        */
        String[] splits = word.replace(LINK_IDENTIFIER, "").split("\"(>|)");
        if (splits[0] != null) {
          splits[0] = splits[0].replace("&", "&");
          try {
            URL newLink = new URL(splits[0]);

            // Don't enqueue the link if we've already discovered it.
            if (!discovered.contains(newLink) && discovered.size() < max) {
              queue.enqueue(newLink);
              discovered.add(newLink);
            }
          } catch (MalformedURLException ignored) {
            // We have already turned the string in question into a valid link. It should not be
            // identified
            // as a link if it does not conform to the above statements. It could still be saved
            // with more
            // complex parsing, but we don't wish to expend resources parsing javascripts or by
            // analysing
            // patterns to determine what to fix.
          }
        }
        if (splits.length > 1 && splits[1] != null) {
          word = splits[1];
        }
      }

      // Make sure we check for lower case values when matching words.
      if (word.toLowerCase().equals(target) || word.toLowerCase().contains(target)) {
        foundTarget = true;
      }
    }
    return foundTarget;
  }
Example #2
0
  public static void main(String[] args) {
    In in = new In(args[0]);
    String[] a = in.readAllStrings();

    System.out.println("Quick 3 way sort:");
    Stopwatch timer = new Stopwatch();
    Quick3Way.sort(a);
    double elapsed = timer.elapsedTime();
    System.out.println("Time:" + elapsed);
    show(a);
  }
  /**
   * A test client
   *
   * @param args {@code args[0]} = Input-file
   */
  public static void main(String args[]) {
    In in = new In(args[0]);
    int N = in.readInt();
    UnionFindWQU qf = new UnionFindWQU(N);
    while (!in.isEmpty()) {
      int p = in.readInt();
      int q = in.readInt();

      if (qf.connected(p, q)) continue;

      qf.union(p, q);
      System.out.println(p + " " + q);
    }
    System.out.println(qf.componentCount() + " components");
  }
Example #4
0
  public WordNet(String synsetFileName, String hyponymFileName) {
    In synsetFile = new In(synsetFileName);
    In hyponymFile = new In(hyponymFileName);
    String[] currentLine;
    String[] splitNouns;

    while (!synsetFile.isEmpty()) {
      currentLine = synsetFile.readLine().split(",");
      map.put(Integer.parseInt(currentLine[0]), currentLine[1]);
      splitNouns = currentLine[1].split(" ");
      for (int i = 0; i < splitNouns.length; i += 1) {
        if (!nouns.contains(splitNouns[i])) {
          nouns.add(splitNouns[i]);
        }
      }
    }
    synsets = new Digraph(map.size());
    while (!hyponymFile.isEmpty()) {
      currentLine = hyponymFile.readLine().split(",");

      for (int i = 1; i < currentLine.length; i++) {
        synsets.addEdge(Integer.parseInt(currentLine[0]), Integer.parseInt(currentLine[i]));
      }
    }
  }