private String[] processBlog(InputStream in) throws BlogCrawlingException {

    // using a set here to avoid duplicates
    Set<String> linksToBlogs = new TreeSet<String>();

    try {

      Page page = new Page(in, null);
      Parser parser = new Parser(new Lexer(page));

      // register a filter to extract all the anchor tags
      TagNameFilter anchorTagsFilter = new TagNameFilter("a");

      StringBuffer buf = new StringBuffer();
      NodeList anchorTagsList = parser.parse(anchorTagsFilter);

      for (int i = 0; i < anchorTagsList.size(); i++) {
        Node node = anchorTagsList.elementAt(i);
        LinkTag tag = (LinkTag) node;
        String linkURL = tag.getLink();

        if (blogDetector.identifyURL(linkURL, null) != Constants.NOT_A_BLOG) {
          // logger.info(" *BLOG Detected* ==> " + linkURL);
          System.out.println("[" + myNumber + "] *BLOG Detected* ==> " + linkURL);
          linksToBlogs.add(linkURL);
        } else {
          System.out.println("[" + myNumber + "] *Non-BLOG Detected* ==> " + linkURL);
        }
      }

      String[] links = new String[linksToBlogs.size()];
      int count = 0;
      for (String linksToBlog : linksToBlogs) {
        links[count++] = linksToBlog;
      }

      return links;

    } catch (ParserException e) {
      e.printStackTrace();
      throw new BlogCrawlingException(e);
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
      throw new BlogCrawlingException(e);
    } catch (IOException e) {
      e.printStackTrace();
      throw new BlogCrawlingException(e);
    }
  }