 public void execute() throws BuildException {
   try {
     log("extracting " + this.url + " into " + this.outFile);
     String data = new HttpDocumentGrabber().grab(this.url);
     DocumentParser parser = new DocumentParser();
     String xhtml = parser.parse(this.url, UrlUtils.formatTitle(this.url), data);
     FileUtils.writeStringToFile(this.outFile, xhtml);
   } catch (Exception ex) {
     throw new BuildException(ex);
   * Searches for a searchKey in the tree. Also retrieves most relevant snippet.
   * @param searchKey, user typed search string.
   * @return String, the most relevant snippet.
  public String search(String searchKey) {
    searchKey = searchKey.toLowerCase();
    searchKey = searchKey.trim();

    searchTerms = searchKey.split("\\W"); // Split around all non-word characters.

    ArrayList<Integer> termIndex = new ArrayList<Integer>();
    ArrayList<Integer> temp;
    Snippet mostRelevantSnippet = null;

    if (searchTerms.length != 0) {
      // Retrieve all indexes of search terms from trieTree
      for (int i = 0; i < searchTerms.length; i++) {
        temp = trieTree.getWordIndexes(searchTerms[i]);
        if (temp != null) termIndex.addAll(temp);

      ArrayList<Snippet> allSnippets = new ArrayList<Snippet>();
      // Now extract snippets around each term search result.
      for (int i = 0; i < termIndex.size(); i++) {

      for (int i = 0; i < allSnippets.size(); i++)
        System.out.println("Snippet " + i + " : " + allSnippets.get(i));

      // Score each snippet and extract most relevant one.
      mostRelevantSnippet = relevanceEngine.getMostRelevant(allSnippets, searchTerms);

    if (mostRelevantSnippet == null) return null;

    return mostRelevantSnippet.toString();
  * Inserts each word in the document into the Trie Tree. This ensures that the document is scanned
  * only once and all searches for keywords can be done in near log time.
 private void initializeTree() {
   Iterator<Word> iter = docParser.getAllWords();
   Word temp = null;
   while (iter.hasNext()) {
     temp = iter.next();
     if (temp != null) trieTree.putWord(temp.getWord(), temp.getStartIndex());