コード例 #1
0
  public static void main(String[] args) {

    try {
      UserAgent userAgent = new UserAgent();

      String content = "";
      try {
        BufferedReader in;
        in = new BufferedReader(new FileReader("/home/vishnu/Desktop/indiankanoon.html"));
        String str;
        while ((str = in.readLine()) != null) {
          content += str;
        }
        in.close();
      } catch (IOException e) {
      }
      userAgent.openContent(content);
      Elements elements = userAgent.doc.findEach("<div class=\"result\">");
      boolean loopCheck = true;
      String textUrl;
      List dataPool1 = new ArrayList();

      while (loopCheck) {
        try {
          System.out.println("\n");
          // elements = table.getRow(counter);
          for (Element element : elements) {
            dataPool1.add(
                element
                    .getChildElements()
                    .get(0)
                    .innerText()); // iterate through & print elements
            textUrl =
                (String) element.getChildElements().get(0).findFirst("<a href>").getAt("href");
            System.out.println(textUrl);
            System.out.println(dataPool1);
            // String nextPage = (String) element.getChildElements().get(0).findFirst("<a
            // href>").getAt("href");
            // userAgent.visit(nextPage);
            dataPool1.clear();
          }
          loopCheck = false;
        } catch (Exception ex) {
          System.err.println(ex);
          loopCheck = false; // exception thrown when end of loop reached. Starting to terminate.
        }
      }

    } catch (JauntException e) {
      System.err.println(e);
    }
  }
コード例 #2
0
ファイル: Example9.java プロジェクト: dipu/Scraping
  public static void main(String[] args) {
    try {
      UserAgent userAgent = new UserAgent();
      userAgent.visit("http://jaunt-api.com/examples/food.htm");

      Elements elements = userAgent.doc.findEvery("<div>"); // find all divs in the document
      System.out.println(
          "Every div: " + elements.size() + " results"); // report number of search results.

      elements = userAgent.doc.findEach("<div>"); // find all non-nested divs
      System.out.println(
          "Each div: " + elements.size() + " results"); // report number of search results.
      // find non-nested divs within <p class='meat'>
      elements = userAgent.doc.findFirst("<p class=meat>").findEach("<div>");
      System.out.println(
          "Meat search: " + elements.size() + " results"); // report number of search results.
    } catch (JauntException e) {
      System.out.println(e);
    }
  }
コード例 #3
0
  public static void main(String[] args) {
    try {
      UserAgent userAgent = new UserAgent();
      userAgent.visit("http://jaunt-api.com/examples/signup.htm");
      Document doc = userAgent.doc;

      doc.fillout(
          "E-mail:", "*****@*****.**"); // fill out the (textfield) component labelled "E-mail:"
      doc.choose(
          "Account Type:", "advanced"); // choose "advanced" from the menu labelled "Account Type:"
      doc.fillout(
          "Comments:", "no comment"); // fill out the (textarea) component labelled "Comments:"
      doc.choose(
          Label.RIGHT,
          "No thanks"); // choose the (radiobutton) component right-labelled "No thanks"
      doc.submit("create trial account"); // press the submit button labelled 'create trial account'
      System.out.println(userAgent.getLocation()); // print the current location (url)
    } catch (JauntException e) {
      System.out.println(e);
    }
  }
コード例 #4
0
  public static void main(String[] args) {
    try {
      UserAgent userAgent = new UserAgent();
      userAgent.visit("http://jaunt-api.com/examples/hello.htm");

      Elements elements =
          userAgent.doc.findEvery("<div|span>"); // find every element who's tagname is div or span.
      System.out.println("search results:\n" + elements.innerHTML()); // print the search results

      elements = userAgent.doc.findEvery("<p id=1|4>"); // find every p element who's id is 1 or 4
      System.out.println("search results:\n" + elements.innerHTML()); // print the search results

      elements =
          userAgent.doc.findEvery("< id=[2-6]>"); // find every element (any name) with id from 2-6
      System.out.println("search results:\n" + elements.innerHTML()); // print the search results

      elements =
          userAgent.doc.findEvery(
              "&lt;p>ho"); // find every p who's joined child text contains 'ho' (regex)
      System.out.println("search results:\n" + elements.innerHTML()); // print the search results
    } catch (ResponseException e) {
      System.out.println(e);
    }
  }