Exemplo n.º 1
1
 @Override
 public HSDeck getDeckDetail(final HSDeck hsDeck, final float n) {
   try {
     final Document value = Jsoup.connect(HPDeckSource.BASE_URL + hsDeck.getUrl()).get();
     final Elements select = value.select("section.class-listing table.listing td.col-name");
     final HashMap<String, String> classHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list = new ArrayList<String>();
     for (int i = 0; i < select.size(); ++i) {
       final String text = select.get(i).select("a").get(0).text();
       classHsItemMap.put(
           text, select.get(i).text().trim().substring(select.get(i).text().trim().length() - 1));
       list.add(text);
     }
     hsDeck.setClassHsItemMap(classHsItemMap);
     hsDeck.setClassHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list));
     final Elements select2 = value.select("section.neutral-listing table.listing td.col-name");
     final HashMap<String, String> neutralHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list2 = new ArrayList<String>();
     for (int j = 0; j < select2.size(); ++j) {
       final String text2 = select2.get(j).select("a").get(0).text();
       neutralHsItemMap.put(
           text2,
           select2.get(j).text().trim().substring(select2.get(j).text().trim().length() - 1));
       list2.add(text2);
     }
     hsDeck.setNeutralHsItemMap(neutralHsItemMap);
     hsDeck.setNeutralHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list2));
     hsDeck.setDescription(
         HtmlHelper.parseDescription(value.select("div.deck-description").html(), n, false));
     return hsDeck;
   } catch (IOException ex) {
     ex.printStackTrace();
     return hsDeck;
   }
 }
Exemplo n.º 2
0
  @Test
  public void testPseudoHas() {
    Document doc =
        Jsoup.parse(
            "<div id=0><p><span>Hello</span></p></div> <div id=1><span class=foo>There</span></div> <div id=2><p>Not</p></div>");

    Elements divs1 = doc.select("div:has(span)");
    assertEquals(2, divs1.size());
    assertEquals("0", divs1.get(0).id());
    assertEquals("1", divs1.get(1).id());

    Elements divs2 = doc.select("div:has([class]");
    assertEquals(1, divs2.size());
    assertEquals("1", divs2.get(0).id());

    Elements divs3 = doc.select("div:has(span, p)");
    assertEquals(3, divs3.size());
    assertEquals("0", divs3.get(0).id());
    assertEquals("1", divs3.get(1).id());
    assertEquals("2", divs3.get(2).id());

    Elements els1 = doc.body().select(":has(p)");
    assertEquals(3, els1.size()); // body, div, dib
    assertEquals("body", els1.first().tagName());
    assertEquals("0", els1.get(1).id());
    assertEquals("2", els1.get(2).id());
  }
Exemplo n.º 3
0
 @Test
 public void testPseudoGreaterThan() {
   Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</p></div><div><p>Four</p>");
   Elements ps = doc.select("div p:gt(0)");
   assertEquals(2, ps.size());
   assertEquals("Two", ps.get(0).text());
   assertEquals("Three", ps.get(1).text());
 }
Exemplo n.º 4
0
 @Test
 public void testPseudoLessThan() {
   Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>");
   Elements ps = doc.select("div p:lt(2)");
   assertEquals(3, ps.size());
   assertEquals("One", ps.get(0).text());
   assertEquals("Two", ps.get(1).text());
   assertEquals("Four", ps.get(2).text());
 }
Exemplo n.º 5
0
 @Test
 public void adjacentSiblings() {
   String h = "<ol><li>One<li>Two<li>Three</ol>";
   Document doc = Jsoup.parse(h);
   Elements sibs = doc.select("li + li");
   assertEquals(2, sibs.size());
   assertEquals("Two", sibs.get(0).text());
   assertEquals("Three", sibs.get(1).text());
 }
Exemplo n.º 6
0
  @Test
  public void parents() {
    Document doc = Jsoup.parse("<div><p>Hello</p></div><p>There</p>");
    Elements parents = doc.select("p").parents();

    assertEquals(3, parents.size());
    assertEquals("div", parents.get(0).tagName());
    assertEquals("body", parents.get(1).tagName());
    assertEquals("html", parents.get(2).tagName());
  }
Exemplo n.º 7
0
 @Test
 public void parentChildStar() {
   String h = "<div id=1><p>Hello<p><b>there</b></p></div><div id=2><span>Hi</span></div>";
   Document doc = Jsoup.parse(h);
   Elements divChilds = doc.select("div > *");
   assertEquals(3, divChilds.size());
   assertEquals("p", divChilds.get(0).tagName());
   assertEquals("p", divChilds.get(1).tagName());
   assertEquals("span", divChilds.get(2).tagName());
 }
Exemplo n.º 8
0
  @Test
  public void testGroupOrAttribute() {
    String h = "<div id=1 /><div id=2 /><div title=foo /><div title=bar />";
    Elements els = Jsoup.parse(h).select("[id],[title=foo]");

    assertEquals(3, els.size());
    assertEquals("1", els.get(0).id());
    assertEquals("2", els.get(1).id());
    assertEquals("foo", els.get(2).attr("title"));
  }
Exemplo n.º 9
0
  @Test
  public void mixCombinator() {
    String h = "<div class=foo><ol><li>One<li>Two<li>Three</ol></div>";
    Document doc = Jsoup.parse(h);
    Elements sibs = doc.select("body > div.foo li + li");

    assertEquals(2, sibs.size());
    assertEquals("Two", sibs.get(0).text());
    assertEquals("Three", sibs.get(1).text());
  }
Exemplo n.º 10
0
 @Test
 public void testByAttributeRegex() {
   Document doc =
       Jsoup.parse(
           "<p><img src=foo.png id=1><img src=bar.jpg id=2><img src=qux.JPEG id=3><img src=old.gif><img></p>");
   Elements imgs = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
   assertEquals(3, imgs.size());
   assertEquals("1", imgs.get(0).id());
   assertEquals("2", imgs.get(1).id());
   assertEquals("3", imgs.get(2).id());
 }
Exemplo n.º 11
0
  @Test
  public void testById() {
    Elements els =
        Jsoup.parse("<div><p id=foo>Hello</p><p id=foo>Foo two!</p></div>").select("#foo");
    assertEquals(2, els.size());
    assertEquals("Hello", els.get(0).text());
    assertEquals("Foo two!", els.get(1).text());

    Elements none = Jsoup.parse("<div id=1></div>").select("#foo");
    assertEquals(0, none.size());
  }
Exemplo n.º 12
0
  @Test
  public void mixCombinatorGroup() {
    String h = "<div class=foo><ol><li>One<li>Two<li>Three</ol></div>";
    Document doc = Jsoup.parse(h);
    Elements els = doc.select(".foo > ol, ol > li + li");

    assertEquals(3, els.size());
    assertEquals("ol", els.get(0).tagName());
    assertEquals("Two", els.get(1).text());
    assertEquals("Three", els.get(2).text());
  }
Exemplo n.º 13
0
  @Test
  public void setHtml() {
    Document doc = Jsoup.parse("<p>One</p><p>Two</p><p>Three</p>");
    Elements ps = doc.select("p");

    ps.prepend("<b>Bold</b>").append("<i>Ital</i>");
    assertEquals("<p><b>Bold</b>Two<i>Ital</i></p>", TextUtil.stripNewlines(ps.get(1).outerHtml()));

    ps.html("<span>Gone</span>");
    assertEquals("<p><span>Gone</span></p>", TextUtil.stripNewlines(ps.get(1).outerHtml()));
  }
Exemplo n.º 14
0
  @Test
  public void testPseudoEquals() {
    Document doc = Jsoup.parse("<div><p>One</p><p>Two</p><p>Three</>p></div><div><p>Four</p>");
    Elements ps = doc.select("div p:eq(0)");
    assertEquals(2, ps.size());
    assertEquals("One", ps.get(0).text());
    assertEquals("Four", ps.get(1).text());

    Elements ps2 = doc.select("div:eq(0) p:eq(0)");
    assertEquals(1, ps2.size());
    assertEquals("One", ps2.get(0).text());
    assertEquals("p", ps2.get(0).tagName());
  }
Exemplo n.º 15
0
  @Test
  public void parentChildElement() {
    String h = "<div id=1><div id=2><div id = 3></div></div></div><div id=4></div>";
    Document doc = Jsoup.parse(h);

    Elements divs = doc.select("div > div");
    assertEquals(2, divs.size());
    assertEquals("2", divs.get(0).id()); // 2 is child of 1
    assertEquals("3", divs.get(1).id()); // 3 is child of 2

    Elements div2 = doc.select("div#1 > div");
    assertEquals(1, div2.size());
    assertEquals("2", div2.get(0).id());
  }
Exemplo n.º 16
0
  @Test
  public void classes() {
    Document doc = Jsoup.parse("<div><p class='mellow yellow'></p><p class='red green'></p>");

    Elements els = doc.select("p");
    assertTrue(els.hasClass("red"));
    assertFalse(els.hasClass("blue"));
    els.addClass("blue");
    els.removeClass("yellow");
    els.toggleClass("mellow");

    assertEquals("blue", els.get(0).className());
    assertEquals("red green blue mellow", els.get(1).className());
  }
Exemplo n.º 17
0
  @Test
  public void testByTag() {
    // should be case insensitive
    Elements els =
        Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><DIV id=3>").select("DIV");
    assertEquals(3, els.size());
    assertEquals("1", els.get(0).id());
    assertEquals("2", els.get(1).id());
    assertEquals("3", els.get(2).id());

    Elements none =
        Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><div id=3>").select("span");
    assertEquals(0, none.size());
  }
Exemplo n.º 18
0
  @Test
  public void handlesCommasInSelector() {
    Document doc =
        Jsoup.parse("<p name='1,2'>One</p><div>Two</div><ol><li>123</li><li>Text</li></ol>");

    Elements ps = doc.select("[name=1,2]");
    assertEquals(1, ps.size());

    Elements containers = doc.select("div, li:matches([0-9,]+)");
    assertEquals(2, containers.size());
    assertEquals("div", containers.get(0).tagName());
    assertEquals("li", containers.get(1).tagName());
    assertEquals("123", containers.get(1).text());
  }
Exemplo n.º 19
0
  /**
   * Find an element by ID, including or under this element.
   *
   * <p>Note that this finds the first matching ID, starting with this element. If you search down
   * from a different starting point, it is possible to find a different element by ID. For unique
   * element by ID within a Document, use {@link Document#getElementById(String)}
   *
   * @param id The ID to search for.
   * @return The first matching element by ID, starting with this element, or null if none found.
   */
  public Element getElementById(String id) {
    Validate.notEmpty(id);

    Elements elements = Collector.collect(new Evaluator.Id(id), this);
    if (elements.size() > 0) return elements.get(0);
    else return null;
  }
Exemplo n.º 20
0
 @Test
 public void adjacentSiblingsWithId() {
   String h = "<ol><li id=1>One<li id=2>Two<li id=3>Three</ol>";
   Document doc = Jsoup.parse(h);
   Elements sibs = doc.select("li#1 + li#2");
   assertEquals(1, sibs.size());
   assertEquals("Two", sibs.get(0).text());
 }
Exemplo n.º 21
0
 @Test
 public void testPseudoCombined() {
   Document doc =
       Jsoup.parse(
           "<div class='foo'><p>One</p><p>Two</p></div><div><p>Three</p><p>Four</p></div>");
   Elements ps = doc.select("div.foo p:gt(0)");
   assertEquals(1, ps.size());
   assertEquals("Two", ps.get(0).text());
 }
Exemplo n.º 22
0
 @Test
 public void testByAttributeRegexCharacterClass() {
   Document doc =
       Jsoup.parse(
           "<p><img src=foo.png id=1><img src=bar.jpg id=2><img src=qux.JPEG id=3><img src=old.gif id=4></p>");
   Elements imgs = doc.select("img[src~=[o]]");
   assertEquals(2, imgs.size());
   assertEquals("1", imgs.get(0).id());
   assertEquals("4", imgs.get(1).id());
 }
Exemplo n.º 23
0
 @Test
 public void filter() {
   String h =
       "<p>Excl</p><div class=headline><p>Hello</p><p>There</p></div><div class=headline><h1>Headline</h1></div>";
   Document doc = Jsoup.parse(h);
   Elements els = doc.select(".headline").select("p");
   assertEquals(2, els.size());
   assertEquals("Hello", els.get(0).text());
   assertEquals("There", els.get(1).text());
 }
Exemplo n.º 24
0
  @Test
  public void selectClassWithSpace() {
    final String html =
        "<div class=\"value\">class without space</div>\n"
            + "<div class=\"value \">class with space</div>";

    Document doc = Jsoup.parse(html);

    Elements found = doc.select("div[class=value ]");
    assertEquals(2, found.size());
    assertEquals("class without space", found.get(0).text());
    assertEquals("class with space", found.get(1).text());

    found = doc.select("div[class=\"value \"]");
    assertEquals(2, found.size());
    assertEquals("class without space", found.get(0).text());
    assertEquals("class with space", found.get(1).text());

    found = doc.select("div[class=\"value\\ \"]");
    assertEquals(0, found.size());
  }
Exemplo n.º 25
0
  @Test
  public void descendant() {
    String h = "<div class=head><p class=first>Hello</p><p>There</p></div><p>None</p>";
    Document doc = Jsoup.parse(h);
    Element root = doc.getElementsByClass("HEAD").first();

    Elements els = root.select(".head p");
    assertEquals(2, els.size());
    assertEquals("Hello", els.get(0).text());
    assertEquals("There", els.get(1).text());

    Elements p = root.select("p.first");
    assertEquals(1, p.size());
    assertEquals("Hello", p.get(0).text());

    Elements empty = root.select("p .first"); // self, not descend, should not match
    assertEquals(0, empty.size());

    Elements aboveRoot = root.select("body div.head");
    assertEquals(0, aboveRoot.size());
  }
Exemplo n.º 26
0
  @Test
  public void containsData() {
    String html = "<p>jsoup</p><script>jsoup</script><span><!-- comments --></span>";
    Document doc = Jsoup.parse(html);
    Element body = doc.body();

    Elements dataEls1 = body.select(":containsData(jsoup)");
    Elements dataEls2 = body.select("script:containsData(jsoup)");
    Elements dataEls3 = body.select("span:containsData(comments)");
    Elements dataEls4 = body.select(":containsData(s)");

    assertEquals(2, dataEls1.size()); // body and script
    assertEquals(1, dataEls2.size());
    assertEquals(dataEls1.last(), dataEls2.first());
    assertEquals("<script>jsoup</script>", dataEls2.outerHtml());
    assertEquals(1, dataEls3.size());
    assertEquals("span", dataEls3.first().tagName());
    assertEquals(3, dataEls4.size());
    assertEquals("body", dataEls4.first().tagName());
    assertEquals("script", dataEls4.get(1).tagName());
    assertEquals("span", dataEls4.get(2).tagName());
  }
Exemplo n.º 27
0
  @Test
  public void testByClass() {
    Elements els =
        Jsoup.parse("<p id=0 class='ONE two'><p id=1 class='one'><p id=2 class='two'>")
            .select("P.One");
    assertEquals(2, els.size());
    assertEquals("0", els.get(0).id());
    assertEquals("1", els.get(1).id());

    Elements none = Jsoup.parse("<div class='one'></div>").select(".foo");
    assertEquals(0, none.size());

    Elements els2 = Jsoup.parse("<div class='One-Two'></div>").select(".one-two");
    assertEquals(1, els2.size());
  }
Exemplo n.º 28
0
  @Test
  public void testGroupOr() {
    String h = "<div title=foo /><div title=bar /><div /><p></p><img /><span title=qux>";
    Document doc = Jsoup.parse(h);
    Elements els = doc.select("p,div,[title]");

    assertEquals(5, els.size());
    assertEquals("div", els.get(0).tagName());
    assertEquals("foo", els.get(0).attr("title"));
    assertEquals("div", els.get(1).tagName());
    assertEquals("bar", els.get(1).attr("title"));
    assertEquals("div", els.get(2).tagName());
    assertTrue(
        els.get(2).attr("title").length() == 0); // missing attributes come back as empty string
    assertFalse(els.get(2).hasAttr("title"));
    assertEquals("p", els.get(3).tagName());
    assertEquals("span", els.get(4).tagName());
  }
Exemplo n.º 29
0
  public Scraper() {

    Document doc = null;

    try {
      doc =
          Jsoup.connect(
                  "http://www.geog.leeds.ac.uk/courses/other/programming/practicals/general/web/scraping-intro/table.html")
              .get();
    } catch (IOException ioe) {
      ioe.printStackTrace();
    }
    Element table = doc.getElementById("datatable");
    Elements rows = table.getElementsByTag("TR");

    for (Element row : rows) {
      Elements tds = row.getElementsByTag("TD");
      for (int i = 0; i < tds.size(); i++) {
        if (i == 1) System.out.println(tds.get(i).text());
      }
    }
  }
Exemplo n.º 30
0
  public static void main(String[] args) {
    Document doc = null;
    try {
      // get page
      doc =
          (Document)
              Jsoup.connect("http://fskm.uitm.edu.my/v1/fakulti/staff-directory/academic/1097.html")
                  .get();
    } catch (IOException ex) {
      ex.printStackTrace();
    }

    // Get Element with specific ID
    Element table = doc.getElementById("mytable");

    // Get text inside Element
    Elements rows = table.getElementsByTag("TR");
    for (Element row : rows) {
      Elements tds = row.getElementsByTag("TD");
      for (int i = 0; i < tds.size(); i++) {
        if (i == 1) System.out.println(tds.get(i).text());
      }
    }
  }