示例#1
1
 @Override
 public HSDeck getDeckDetail(final HSDeck hsDeck, final float n) {
   try {
     final Document value = Jsoup.connect(HPDeckSource.BASE_URL + hsDeck.getUrl()).get();
     final Elements select = value.select("section.class-listing table.listing td.col-name");
     final HashMap<String, String> classHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list = new ArrayList<String>();
     for (int i = 0; i < select.size(); ++i) {
       final String text = select.get(i).select("a").get(0).text();
       classHsItemMap.put(
           text, select.get(i).text().trim().substring(select.get(i).text().trim().length() - 1));
       list.add(text);
     }
     hsDeck.setClassHsItemMap(classHsItemMap);
     hsDeck.setClassHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list));
     final Elements select2 = value.select("section.neutral-listing table.listing td.col-name");
     final HashMap<String, String> neutralHsItemMap = new HashMap<String, String>();
     final ArrayList<String> list2 = new ArrayList<String>();
     for (int j = 0; j < select2.size(); ++j) {
       final String text2 = select2.get(j).select("a").get(0).text();
       neutralHsItemMap.put(
           text2,
           select2.get(j).text().trim().substring(select2.get(j).text().trim().length() - 1));
       list2.add(text2);
     }
     hsDeck.setNeutralHsItemMap(neutralHsItemMap);
     hsDeck.setNeutralHsItemList(DataBaseManager.getInstance().getAllCardsByNames(list2));
     hsDeck.setDescription(
         HtmlHelper.parseDescription(value.select("div.deck-description").html(), n, false));
     return hsDeck;
   } catch (IOException ex) {
     ex.printStackTrace();
     return hsDeck;
   }
 }
示例#2
0
  @Test
  public void dataset() {
    Document doc =
        Jsoup.parse(
            "<div id=1 data-name=jsoup class=new data-package=jar>Hello</div><p id=2>Hello</p>");
    Element div = doc.select("div").first();
    Map<String, String> dataset = div.dataset();
    Attributes attributes = div.attributes();

    // size, get, set, add, remove
    assertEquals(2, dataset.size());
    assertEquals("jsoup", dataset.get("name"));
    assertEquals("jar", dataset.get("package"));

    dataset.put("name", "jsoup updated");
    dataset.put("language", "java");
    dataset.remove("package");

    assertEquals(2, dataset.size());
    assertEquals(4, attributes.size());
    assertEquals("jsoup updated", attributes.get("data-name"));
    assertEquals("jsoup updated", dataset.get("name"));
    assertEquals("java", attributes.get("data-language"));
    assertEquals("java", dataset.get("language"));

    attributes.put("data-food", "bacon");
    assertEquals(3, dataset.size());
    assertEquals("bacon", dataset.get("food"));

    attributes.put("data-", "empty");
    assertEquals(null, dataset.get("")); // data- is not a data attribute

    Element p = doc.select("p").first();
    assertEquals(0, p.dataset().size());
  }
示例#3
0
  @Test
  public void insertChildrenAtPosition() {
    Document doc =
        Jsoup.parse(
            "<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>");
    Element div1 = doc.select("div").get(0);
    Elements p1s = div1.select("p");
    Element div2 = doc.select("div").get(1);

    assertEquals(2, div2.childNodeSize());
    div2.insertChildren(-1, p1s);
    assertEquals(2, div1.childNodeSize()); // moved two out
    assertEquals(4, div2.childNodeSize());
    assertEquals(3, p1s.get(1).siblingIndex()); // should be last

    List<Node> els = new ArrayList<Node>();
    Element el1 = new Element(Tag.valueOf("span"), "").text("Span1");
    Element el2 = new Element(Tag.valueOf("span"), "").text("Span2");
    TextNode tn1 = new TextNode("Text4", "");
    els.add(el1);
    els.add(el2);
    els.add(tn1);

    assertNull(el1.parent());
    div2.insertChildren(-2, els);
    assertEquals(div2, el1.parent());
    assertEquals(7, div2.childNodeSize());
    assertEquals(3, el1.siblingIndex());
    assertEquals(4, el2.siblingIndex());
    assertEquals(5, tn1.siblingIndex());
  }
示例#4
0
  public static List genSitemap(String mapUrl, String base) {
    try {
      Document doc = Jsoup.connect(mapUrl).get();
      Elements links = doc.select("a");
      Elements imgs = doc.select("img");
      List<String> stringLinks = new ArrayList<String>();
      for (Element link : links) {
        stringLinks.add(link.attr("abs:href"));
      }

      Iterator<String> domIt = stringLinks.iterator(); // filter out links to external domains
      while (domIt.hasNext()) {
        String incDom = domIt.next();
        boolean domTest;
        domTest = incDom.contains(base);
        if (domTest == false) {
          domIt.remove();
        }
      }
      Iterator<String> i = stringLinks.iterator();
      while (i.hasNext()) { // remove index.html from incoming links prevents infinite loop
        String incA = i.next();
        if (incA.contains("index")) {
          i.remove();
        }
      }

      return stringLinks;
    } catch (Exception e) {
      // System.out.println(e);
      return null;
    }
  }
示例#5
0
 @Test
 public void testHtmlContainsOuter() {
   Document doc = Jsoup.parse("<title>Check</title> <div>Hello there</div>");
   doc.outputSettings().indentAmount(0);
   assertTrue(doc.html().contains(doc.select("title").outerHtml()));
   assertTrue(doc.html().contains(doc.select("div").outerHtml()));
 }
示例#6
0
 @Test
 public void testTagNameSet() {
   Document doc = Jsoup.parse("<div><i>Hello</i>");
   doc.select("i").first().tagName("em");
   assertEquals(0, doc.select("i").size());
   assertEquals(1, doc.select("em").size());
   assertEquals("<em>Hello</em>", doc.select("div").first().html());
 }
示例#7
0
  @Test
  public void testHasText() {
    Document doc = Jsoup.parse("<div><p>Hello</p><p></p></div>");
    Element div = doc.select("div").first();
    Elements ps = doc.select("p");

    assertTrue(div.hasText());
    assertTrue(ps.first().hasText());
    assertFalse(ps.last().hasText());
  }
示例#8
0
  @Test
  public void testSetText() {
    String h = "<div id=1>Hello <p>there <b>now</b></p></div>";
    Document doc = Jsoup.parse(h);
    assertEquals("Hello there now", doc.text()); // need to sort out node whitespace
    assertEquals("there now", doc.select("p").get(0).text());

    Element div = doc.getElementById("1").text("Gone");
    assertEquals("Gone", div.text());
    assertEquals(0, doc.select("p").size());
  }
示例#9
0
  @Test
  public void testGetTextNodes() {
    Document doc = Jsoup.parse("<p>One <span>Two</span> Three <br> Four</p>");
    List<TextNode> textNodes = doc.select("p").first().textNodes();

    assertEquals(3, textNodes.size());
    assertEquals("One ", textNodes.get(0).text());
    assertEquals(" Three ", textNodes.get(1).text());
    assertEquals(" Four", textNodes.get(2).text());

    assertEquals(0, doc.select("br").first().textNodes().size());
  }
示例#10
0
 @Test
 public void testContainerOutput() {
   Document doc =
       Jsoup.parse(
           "<title>Hello there</title> <div><p>Hello</p><p>there</p></div> <div>Another</div>");
   assertEquals("<title>Hello there</title>", doc.select("title").first().outerHtml());
   assertEquals(
       "<div>\n <p>Hello</p>\n <p>there</p>\n</div>", doc.select("div").first().outerHtml());
   assertEquals(
       "<div>\n <p>Hello</p>\n <p>there</p>\n</div> \n<div>\n Another\n</div>",
       doc.select("body").first().html());
 }
示例#11
0
  @Test
  public void after() {
    Document doc = Jsoup.parse("<div><p>Hello</p><p>There</p></div>");
    Element p1 = doc.select("p").first();
    p1.after("<div>one</div><div>two</div>");
    assertEquals(
        "<div><p>Hello</p><div>one</div><div>two</div><p>There</p></div>",
        TextUtil.stripNewlines(doc.body().html()));

    doc.select("p").last().after("<p>Three</p><!-- four -->");
    assertEquals(
        "<div><p>Hello</p><div>one</div><div>two</div><p>There</p><p>Three</p><!-- four --></div>",
        TextUtil.stripNewlines(doc.body().html()));
  }
示例#12
0
  @Test
  public void testCssPath() {
    Document doc = Jsoup.parse("<div id=\"id1\">A</div><div>B</div><div class=\"c1 c2\">C</div>");
    Element divA = doc.select("div").get(0);
    Element divB = doc.select("div").get(1);
    Element divC = doc.select("div").get(2);
    assertEquals(divA.cssSelector(), "#id1");
    assertEquals(divB.cssSelector(), "html > body > div:nth-child(2)");
    assertEquals(divC.cssSelector(), "html > body > div.c1.c2");

    assertTrue(divA == doc.select(divA.cssSelector()).first());
    assertTrue(divB == doc.select(divB.cssSelector()).first());
    assertTrue(divC == doc.select(divC.cssSelector()).first());
  }
示例#13
0
  @Test
  public void insertChildrenAsCopy() {
    Document doc = Jsoup.parse("<div id=1>Text <p>One</p> Text <p>Two</p></div><div id=2></div>");
    Element div1 = doc.select("div").get(0);
    Element div2 = doc.select("div").get(1);
    Elements ps = doc.select("p").clone();
    ps.first().text("One cloned");
    div2.insertChildren(-1, ps);

    assertEquals(4, div1.childNodeSize()); // not moved -- cloned
    assertEquals(2, div2.childNodeSize());
    assertEquals(
        "<div id=\"1\">Text <p>One</p> Text <p>Two</p></div><div id=\"2\"><p>One cloned</p><p>Two</p></div>",
        TextUtil.stripNewlines(doc.body().html()));
  }
示例#14
0
 @Test
 public void testGetChildText() {
   Document doc = Jsoup.parse("<p>Hello <b>there</b> now");
   Element p = doc.select("p").first();
   assertEquals("Hello there now", p.text());
   assertEquals("Hello now", p.ownText());
 }
示例#15
0
  @Test
  public void testPrependRowToTable() {
    Document doc = Jsoup.parse("<table><tr><td>1</td></tr></table>");
    Element table = doc.select("tbody").first();
    table.prepend("<tr><td>2</td></tr>");

    assertEquals(
        "<table><tbody><tr><td>2</td></tr><tr><td>1</td></tr></tbody></table>",
        TextUtil.stripNewlines(doc.body().html()));

    // check sibling index (reindexChildren):
    Elements ps = doc.select("tr");
    for (int i = 0; i < ps.size(); i++) {
      assertEquals(i, ps.get(i).siblingIndex);
    }
  }
示例#16
0
  @Test
  public void testClonesClassnames() {
    Document doc = Jsoup.parse("<div class='one two'></div>");
    Element div = doc.select("div").first();
    Set<String> classes = div.classNames();
    assertEquals(2, classes.size());
    assertTrue(classes.contains("one"));
    assertTrue(classes.contains("two"));

    Element copy = div.clone();
    Set<String> copyClasses = copy.classNames();
    assertEquals(2, copyClasses.size());
    assertTrue(copyClasses.contains("one"));
    assertTrue(copyClasses.contains("two"));
    copyClasses.add("three");
    copyClasses.remove("one");

    assertTrue(classes.contains("one"));
    assertFalse(classes.contains("three"));
    assertFalse(copyClasses.contains("one"));
    assertTrue(copyClasses.contains("three"));

    assertEquals("", div.html());
    assertEquals("", copy.html());
  }
示例#17
0
  @Test
  public void testClone() {
    Document doc = Jsoup.parse("<div><p>One<p><span>Two</div>");

    Element p = doc.select("p").get(1);
    Element clone = p.clone();

    assertNull(clone.parent()); // should be orphaned
    assertEquals(0, clone.siblingIndex);
    assertEquals(1, p.siblingIndex);
    assertNotNull(p.parent());

    clone.append("<span>Three");
    assertEquals(
        "<p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(clone.outerHtml()));
    assertEquals(
        "<div><p>One</p><p><span>Two</span></p></div>",
        TextUtil.stripNewlines(doc.body().html())); // not modified

    doc.body().appendChild(clone); // adopt
    assertNotNull(clone.parent());
    assertEquals(
        "<div><p>One</p><p><span>Two</span></p></div><p><span>Two</span><span>Three</span></p>",
        TextUtil.stripNewlines(doc.body().html()));
  }
示例#18
0
文件: Worker.java 项目: m1/Parker
  public Worker(String url, boolean verbose) throws Exception {
    Document doc;
    doc = Jsoup.connect(url).get();
    // select anchors with href only
    Elements links = doc.select("a[href]");
    String l_Href;
    String host;
    int linksNum;
    Parser parser;
    for (Element link : links) {
      // absolute = http:// added
      l_Href = link.attr("abs:href");
      if (!l_Href.isEmpty()) {
        parser = new Parser(l_Href);
        host = parser.getHost();
        // if tempStats contains the url, add one to the value
        if (tempStats.containsKey(host)) {
          linksNum = tempStats.get(host);
          tempStats.put(host, linksNum += 1);
        }
        // if it doesn't, add it

        else {
          tempStats.put(host, 1);
        }
        // parse the url
        tempQueue.add(parser.getURL());
      }
    }
    if (verbose) {
      System.out.println(
          Thread.currentThread().getName() + " : " + tempQueue.size() + " links from " + url);
    }
  }
示例#19
0
 public List<String> extractCities(Document doc) {
   HashMap<String, String> cityMap = new HashMap<String, String>();
   cityMap.put("Adana", "Adana");
   cityMap.put("Konya", "Konya");
   cityMap.put("Tekirda\u011f", "Tekirda\u011f");
   // \u011f
   List<String> cityList = new ArrayList<String>();
   Element ilanDetay = doc.select("div#divIlanDetay").first();
   String patternJobTitle = ".*(\u015eehir/\u00dclke|City/Country|Location).*";
   Pattern pattern = Pattern.compile(patternJobTitle);
   Matcher matcher = pattern.matcher(getPlainText(ilanDetay));
   if (matcher.find()) {
     String cityLine = matcher.group();
     String[] cityLineArr = cityLine.split(":");
     if (cityLineArr.length > 1) {
       String cityCommaStr = cityLineArr[1].trim();
       String[] cityArr = cityCommaStr.split(" ")[0].split(",");
       for (String city : cityArr) {
         cityList.add(trim(city));
       }
     }
   }
   if (cityList.size() == 0) {
     Set<String> tokenSet = tokenize(doc.text());
     for (String s : tokenSet) {
       if (cityMap.containsKey(s)) {
         cityList.add(trim(cityMap.get(s)));
       }
     }
   }
   return cityList;
 }
示例#20
0
  public JobData parse(Document doc) {

    String keyword = doc.select("meta[name=keywords]").first().attr("content");
    String[] keywordArr = keyword.split(",");

    String uniqueId = extractUniqueId(doc);
    String companyName = keywordArr[0];
    String jobTitle = keywordArr[1];
    List<String> cityList = extractCities(doc);

    Date date = extractDate(doc);
    String summary = summary(doc);
    JobData jobData = null;
    try {
      jobData =
          new JobData(
              trim(SOURCE),
              trim(uniqueId),
              trim(companyName),
              trim(jobTitle),
              cityList,
              trim(summary),
              date,
              trim(doc.baseUri()),
              "");
    } catch (Exception ex) {
      System.err.println(doc.baseUri() + "  , " + ex.getMessage());
    }
    return jobData; // To change body of implemented methods use File | Settings | File Templates.
  }
示例#21
0
  @Test
  public void testClassNames() {
    Document doc = Jsoup.parse("<div class=\"c1 c2\">C</div>");
    Element div = doc.select("div").get(0);

    assertEquals("c1 c2", div.className());

    final Set<String> set1 = div.classNames();
    final Object[] arr1 = set1.toArray();
    assertTrue(arr1.length == 2);
    assertEquals("c1", arr1[0]);
    assertEquals("c2", arr1[1]);

    // Changes to the set should not be reflected in the Elements getters
    set1.add("c3");
    assertTrue(2 == div.classNames().size());
    assertEquals("c1 c2", div.className());

    // Update the class names to a fresh set
    final Set<String> newSet = new LinkedHashSet<String>(3);
    newSet.addAll(set1);
    newSet.add("c3");

    div.classNames(newSet);

    assertEquals("c1 c2 c3", div.className());

    final Set<String> set2 = div.classNames();
    final Object[] arr2 = set2.toArray();
    assertTrue(arr2.length == 3);
    assertEquals("c1", arr2[0]);
    assertEquals("c2", arr2[1]);
    assertEquals("c3", arr2[2]);
  }
示例#22
0
  @Test
  public void insertChildrenArgumentValidation() {
    Document doc = Jsoup.parse("<div id=1>Text <p>One</p> Text <p>Two</p></div><div id=2></div>");
    Element div1 = doc.select("div").get(0);
    Element div2 = doc.select("div").get(1);
    List<Node> children = div1.childNodes();

    try {
      div2.insertChildren(6, children);
      fail();
    } catch (IllegalArgumentException e) {
    }

    try {
      div2.insertChildren(-5, children);
      fail();
    } catch (IllegalArgumentException e) {
    }

    try {
      div2.insertChildren(0, null);
      fail();
    } catch (IllegalArgumentException e) {
    }
  }
示例#23
0
 @Test
 public void testElementSiblingIndexSameContent() {
   Document doc = Jsoup.parse("<div><p>One</p>...<p>One</p>...<p>One</p>");
   Elements ps = doc.select("p");
   assertTrue(0 == ps.get(0).elementSiblingIndex());
   assertTrue(1 == ps.get(1).elementSiblingIndex());
   assertTrue(2 == ps.get(2).elementSiblingIndex());
 }
示例#24
0
  @Test
  public void testBrHasSpace() {
    Document doc = Jsoup.parse("<p>Hello<br>there</p>");
    assertEquals("Hello there", doc.text());
    assertEquals("Hello there", doc.select("p").first().ownText());

    doc = Jsoup.parse("<p>Hello <br> there</p>");
    assertEquals("Hello there", doc.text());
  }
示例#25
0
  private String summary(Document doc) {
    String summary = "";
    Element element = doc.select("#divIlanDetay").first();
    if (element != null) {
      summary = element.text();
    }

    return summary;
  }
示例#26
0
 @Test
 public void testGetElementsWithAttributeDash() {
   Document doc =
       Jsoup.parse(
           "<meta http-equiv=content-type value=utf8 id=1> <meta name=foo content=bar id=2> <div http-equiv=content-type value=utf8 id=3>");
   Elements meta = doc.select("meta[http-equiv=content-type], meta[charset]");
   assertEquals(1, meta.size());
   assertEquals("1", meta.first().id());
 }
示例#27
0
 @Test
 public void testWrapWithRemainder() {
   Document doc = Jsoup.parse("<div><p>Hello</p></div>");
   Element p = doc.select("p").first();
   p.wrap("<div class='head'></div><p>There!</p>");
   assertEquals(
       "<div><div class=\"head\"><p>Hello</p><p>There!</p></div></div>",
       TextUtil.stripNewlines(doc.body().html()));
 }
示例#28
0
  @Test
  public void parentlessToString() {
    Document doc = Jsoup.parse("<img src='foo'>");
    Element img = doc.select("img").first();
    assertEquals("<img src=\"foo\">", img.toString());

    img.remove(); // lost its parent
    assertEquals("<img src=\"foo\">", img.toString());
  }
示例#29
0
 @Test
 public void testNamespacedElements() {
   // Namespaces with ns:tag in HTML must be translated to ns|tag in CSS.
   String html = "<html><body><fb:comments /></body></html>";
   Document doc = Jsoup.parse(html, "http://example.com/bar/");
   Elements els = doc.select("fb|comments");
   assertEquals(1, els.size());
   assertEquals("html > body > fb|comments", els.get(0).cssSelector());
 }
示例#30
0
  @Test
  public void testGetDataNodes() {
    Document doc = Jsoup.parse("<script>One Two</script> <style>Three Four</style> <p>Fix Six</p>");
    Element script = doc.select("script").first();
    Element style = doc.select("style").first();
    Element p = doc.select("p").first();

    List<DataNode> scriptData = script.dataNodes();
    assertEquals(1, scriptData.size());
    assertEquals("One Two", scriptData.get(0).getWholeData());

    List<DataNode> styleData = style.dataNodes();
    assertEquals(1, styleData.size());
    assertEquals("Three Four", styleData.get(0).getWholeData());

    List<DataNode> pData = p.dataNodes();
    assertEquals(0, pData.size());
  }