Пример #1
0
  public static void focused() throws FileNotFoundException, UnsupportedEncodingException {

    while ((!toVisit.isEmpty()) && (keylinks.size() < 1000)) {

      //			startTime = new Date().getTime();
      Link node = toVisit.get(0);

      while (true) {
        if (node.getDistance() <= 5)
          try {
            doc = Jsoup.connect(node.getUrl()).get();
            if (checkkeyphrase(doc)) {
              keylinks.add(node);
              if (node.getDistance() < 5) {
                links = doc.select("a[href]");
                for (Element link : links) {
                  filter(new Link(link.absUrl("href"), (node.getDistance() + 1)));
                }
              }
              System.out.println(
                  "toVisit: "
                      + toVisit.size()
                      + "  Visited: "
                      + visited.size()
                      + "  Distance: "
                      + node.getDistance()
                      + " "
                      + "  keylinks: "
                      + keylinks.size()
                      + " "
                      + toVisit.get(0).getUrl());
            }
            visited.add(node);

            break;
          } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
      }
      toVisit.remove(0);

      //			endTime = new Date().getTime();
      //			execTime = endTime - startTime;
      //			if ( execTime < POLITE ) Thread.sleep( POLITE - execTime );

    }
    PrintWriter writer = new PrintWriter("keylinks.txt", "UTF-8");
    System.out.println("Visted: " + visited.size());
    System.out.println("toVisit: " + toVisit.size());
    System.out.println("keylinks: " + keylinks.size());
    for (Link l : keylinks) {
      writer.println(l.getUrl());
    }
    writer.close();
  }
Пример #2
0
  public static void filter(Link link) {
    // boolean c1, c2, c3, c4, c5;

    link.setUrl(link.getUrl().split("#")[0]);

    if (link.getUrl().toLowerCase().contains("https://en.wikipedia.org/wiki/".toLowerCase()))
      if ((link.getUrl().length() - link.getUrl().replaceAll(":", "").length()) == 1)
        if (!(link.getUrl()
            .toLowerCase()
            .contains("https://en.wikipedia.org/wiki/Main_Page".toLowerCase())))
          if (link.getDistance() <= MAX_DEPTH)
            if ((!toVisit.contains(link)) && (!visited.contains(link))) toVisit.add(link);
  }
 public void postProcessResult(Link result, HttpServletRequest request) {
   StringBuffer url = new StringBuffer(result.getUrl());
   int idx = url.indexOf("/");
   LOG.debug("postProcessResult() {} ({})", idx, url);
   if (idx >= 0) {
     url.insert(idx, getPrefix(request));
   } // if
   LOG.debug("postProcessResult() {} ({})", idx, url);
   result.setUrl(url.toString());
 } // postProcessResult()
Пример #4
0
  public static void main(String[] args)
      throws FileNotFoundException, UnsupportedEncodingException, InterruptedException {

    keyphrase = "concordance";
    toVisit.add(SEED);
    if (keyphrase == null) unfocused();
    else {
      Link node = toVisit.get(0);
      while (true) {
        if (node.getDistance() < 5)
          try {
            doc = Jsoup.connect(node.getUrl()).get();
            links = doc.select("a[href]");
            for (Element link : links) {
              filter(new Link(link.absUrl("href"), (node.getDistance() + 1)));
            }
            visited.add(node);
            if (checkkeyphrase(doc)) {
              keylinks.add(node);
            }
            System.out.println(
                "toVisit: "
                    + toVisit.size()
                    + "  Visited: "
                    + visited.size()
                    + "  Distance: "
                    + node.getDistance()
                    + " "
                    + "  keylinks: "
                    + keylinks.size()
                    + " "
                    + toVisit.get(0).getUrl());
            break;
          } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
      }
      toVisit.remove(0);
      focused();
    }
  }
Пример #5
0
 public String getFaqLink() {
   return faq.getUrl();
 }
Пример #6
0
 public String getDemoLink() {
   return demo.getUrl();
 }