Beispiel #1
0
  @Test
  public void testClassNames() {
    Document doc = Jsoup.parse("<div class=\"c1 c2\">C</div>");
    Element div = doc.select("div").get(0);

    assertEquals("c1 c2", div.className());

    final Set<String> set1 = div.classNames();
    final Object[] arr1 = set1.toArray();
    assertTrue(arr1.length == 2);
    assertEquals("c1", arr1[0]);
    assertEquals("c2", arr1[1]);

    // Changes to the set should not be reflected in the Elements getters
    set1.add("c3");
    assertTrue(2 == div.classNames().size());
    assertEquals("c1 c2", div.className());

    // Update the class names to a fresh set
    final Set<String> newSet = new LinkedHashSet<String>(3);
    newSet.addAll(set1);
    newSet.add("c3");

    div.classNames(newSet);

    assertEquals("c1 c2 c3", div.className());

    final Set<String> set2 = div.classNames();
    final Object[] arr2 = set2.toArray();
    assertTrue(arr2.length == 3);
    assertEquals("c1", arr2[0]);
    assertEquals("c2", arr2[1]);
    assertEquals("c3", arr2[2]);
  }
Beispiel #2
0
  @Test
  public void testClonesClassnames() {
    Document doc = Jsoup.parse("<div class='one two'></div>");
    Element div = doc.select("div").first();
    Set<String> classes = div.classNames();
    assertEquals(2, classes.size());
    assertTrue(classes.contains("one"));
    assertTrue(classes.contains("two"));

    Element copy = div.clone();
    Set<String> copyClasses = copy.classNames();
    assertEquals(2, copyClasses.size());
    assertTrue(copyClasses.contains("one"));
    assertTrue(copyClasses.contains("two"));
    copyClasses.add("three");
    copyClasses.remove("one");

    assertTrue(classes.contains("one"));
    assertFalse(classes.contains("three"));
    assertFalse(copyClasses.contains("one"));
    assertTrue(copyClasses.contains("three"));

    assertEquals("", div.html());
    assertEquals("", copy.html());
  }
 String parseAuthor(Element element) {
   try {
     if (element.classNames().contains("m-hero__slot")) {
       return element.getElementsByClass("byline").first().text();
     } else if (element.classNames().contains("m-entry-slot")) {
       return element.getElementsByClass("author").first().text();
     } else throw new NullPointerException();
   } catch (NullPointerException e) {
     e.printStackTrace();
     return "Unknown author";
   }
 }
 String parseArticleLink(Element element) {
   try {
     if (element.classNames().contains("m-hero__slot")) {
       Element a = element.getElementsByClass("m-hero__slot-link").first();
       return a.attr("href");
     } else if (element.classNames().contains("m-entry-slot")) {
       Element h3 = element.getElementsByTag("h3").first();
       Element a = h3.getElementsByTag("a").first();
       return a.attr("href");
     } else throw new NullPointerException();
   } catch (NullPointerException e) {
     e.printStackTrace();
     return null;
   }
 }
 String parseTitle(Element element) {
   try {
     if (element.classNames().contains("m-hero__slot")) {
       Element a = element.getElementsByClass("m-hero__slot-link").first();
       Element h2 = a.getElementsByTag("h2").first();
       return h2.text();
     } else if (element.classNames().contains("m-entry-slot")) {
       Element h3 = element.getElementsByTag("h3").first();
       return h3.text();
     } else throw new NullPointerException();
   } catch (NullPointerException e) {
     e.printStackTrace();
     return "Unknown title";
   }
 }
  private void findRecommendations(
      @NotNull Document doc, @NotNull BilingualQueryResultBuilder resultBuilder) {
    // Determine all candidate nodes:
    Elements alternativeNodes = doc.select("div.cc > p > *");

    Language currentLanguage = null;

    for (Element node : alternativeNodes) {
      // If the next node is a flagicon, try to determine the language for the next entries from the
      // class name
      if (node.tagName().equals("span") && node.hasClass("flagicon")) {
        Set<String> classNames = node.classNames();
        classNames.remove("flagicon");
        for (String className : classNames) {
          Language candidate = Language.getExistingLanguageById(className);
          if (candidate != null) {
            currentLanguage = candidate;
            break;
          }
        }
      } else if (node.tagName().equals("a")) {
        String recommendationText = node.text();

        DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder();
        objectBuilder.setLanguage(currentLanguage).setGeneralForm(recommendationText);

        resultBuilder.addSimilarRecommendation(objectBuilder.build());
      }
    }
  }
 String parseImageURL(Element element) {
   try {
     if (element.classNames().contains("m-hero__slot")) {
       Element a = element.getElementsByClass("m-hero__slot-link").first();
       Element imgDiv = a.getElementsByAttribute("data-original").first();
       return imgDiv.attr("data-original");
     } else if (element.classNames().contains("m-entry-slot")) {
       Element imgDiv = element.getElementsByAttribute("data-original").first();
       return imgDiv.attr("data-original");
     } else throw new NullPointerException();
   } catch (NullPointerException e) {
     e.printStackTrace();
     // Return an image that says "No image"
     //noinspection SpellCheckingInspection
     return "http://best-classic-cars.com/images/no_image_available.png.pagespeed.ce.NRX39FjzIc.png";
   }
 }
Beispiel #8
0
  /**
   * Remove a class name from this element's {@code class} attribute.
   *
   * @param className class name to remove
   * @return this element
   */
  public Element removeClass(String className) {
    Validate.notNull(className);

    Set<String> classes = classNames();
    classes.remove(className);
    classNames(classes);

    return this;
  }
Beispiel #9
0
  /**
   * Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise
   * add it.
   *
   * @param className class name to toggle
   * @return this element
   */
  public Element toggleClass(String className) {
    Validate.notNull(className);

    Set<String> classes = classNames();
    if (classes.contains(className)) classes.remove(className);
    else classes.add(className);
    classNames(classes);

    return this;
  }
  private void recurse(final Element element, final Map<String, Object> values, final int depth) {

    final Tag tag = element.tag();
    final Set<String> classes = element.classNames();
    final String link = element.attr("href");
    final Object content = extractChildContent(element);

    if (!classes.isEmpty()) {

      removeEmpty(classes);

      // toplevel classes define type
      if (tag.isBlock()) {

        if (depth == 0) {

          // store type attribute
          values.put("type", classes);

          for (final Element child : element.children()) {
            recurse(child, values, depth + 1);
          }

        } else {

          final Map<String, Object> childMap = new LinkedHashMap<>();
          values.put(classes.iterator().next(), childMap);

          if (content != null) {
            childMap.put("name", content);
          }

          for (final Element child : element.children()) {
            recurse(child, childMap, depth + 1);
          }
        }

      } else if (tag.isInline()) {

        // extract href and store as URL
        if (classes.contains("url") && StringUtils.isNotBlank(link)) {

          values.put("url", link);
          classes.remove("url");
        }

        if (content != null) {

          for (final String type : classes) {
            values.put(type, content);
          }
        }
      }
    }
  }
Beispiel #11
0
 private ViewModel parseDetail(Document doc, ViewModel item) {
   if (doc.select("select#SeasonSelection").size() > 0) {
     item.setType(ViewModel.Type.SERIES);
     String rel = doc.select("select#SeasonSelection").attr("rel");
     rel = rel.substring(rel.indexOf("SeriesID=") + "SeriesID=".length());
     item.setSeriesID(Integer.valueOf(rel));
     // Fill seasons and episodes
     Elements seasons = doc.select("select#SeasonSelection > option");
     List<Season> list = new ArrayList<Season>();
     for (Element season : seasons) {
       String[] rels = season.attr("rel").split(",");
       Season s = new Season();
       s.id = Integer.valueOf(season.val());
       s.name = season.text();
       s.episodes = rels;
       list.add(s);
     }
     item.setSeasons(list.toArray(new Season[list.size()]));
   } else {
     item.setType(ViewModel.Type.MOVIE);
     List<Host> hostlist = new ArrayList<Host>();
     Elements hosts = doc.select("ul#HosterList").select("li");
     for (Element host : hosts) {
       int hosterId = 0;
       Set<String> classes = host.classNames();
       for (String c : classes) {
         if (c.startsWith("MirStyle")) {
           hosterId = Integer.valueOf(c.substring("MirStyle".length()));
         }
       }
       String name = host.select("div.Named").text();
       String count = host.select("div.Data").text();
       int c = 1;
       if (count.contains("/")) {
         count = count.substring(count.indexOf("/") + 1, count.indexOf(" ", count.indexOf("/")));
         c = Integer.valueOf(count);
       }
       for (int i = 0; i < c; i++) {
         Host h = Host.selectById(hosterId);
         h.setName(name);
         h.setMirror(i + 1);
         if (h.isEnabled()) {
           hostlist.add(h);
         }
       }
     }
     item.setMirrors(hostlist.toArray(new Host[hostlist.size()]));
   }
   String imdb = doc.select("div.IMDBRatingLinks > a").attr("href").trim();
   if (!TextUtils.isEmpty(imdb)) {
     imdb = imdb.replace("/", "");
     item.setImdbId(imdb);
   }
   return item;
 }
Beispiel #12
0
  @Test
  public void testClassDomMethods() {
    Document doc =
        Jsoup.parse("<div><span class=' mellow yellow '>Hello <b>Yellow</b></span></div>");
    List<Element> els = doc.getElementsByAttribute("class");
    Element span = els.get(0);
    assertEquals("mellow yellow", span.className());
    assertTrue(span.hasClass("mellow"));
    assertTrue(span.hasClass("yellow"));
    Set<String> classes = span.classNames();
    assertEquals(2, classes.size());
    assertTrue(classes.contains("mellow"));
    assertTrue(classes.contains("yellow"));

    assertEquals("", doc.className());
    classes = doc.classNames();
    assertEquals(0, classes.size());
    assertFalse(doc.hasClass("mellow"));
  }
  private void extractDataAsString(Document doc) throws FailedToFindElementException {
    Elements infoElements = doc.select(CSS_QUERY_TO_FIND_STOCKS_INFO);
    if (infoElements == null || infoElements.size() < 1) {
      throw new FailedToFindElementException("Cannot find stock info element.");
    }
    Element info = infoElements.get(0);
    if (info != null) {
      Elements dts = info.select("dt");
      if (dts != null) {
        stockCodeStr = dts.text().trim();
      }
      Elements category = info.select(".category");
      if (category != null) {
        sectorStr = category.text().trim();
      }
    }

    Elements stocksTables = doc.select(CSS_QUERY_TO_FIND_STOCKS_TABLE);
    if (stocksTables == null || stocksTables.size() < 1) {
      throw new FailedToFindElementException("Cannot find stock table element.");
    }
    Element stocksTable = stocksTables.get(0);

    Elements symbol = stocksTable.select(".symbol");
    if (symbol != null) {
      stockNameStr = symbol.text().trim();
    }

    Elements tds = stocksTable.select("td");
    for (Element td : tds) {
      String text = Util.normalizeRoundParentheses(td.text().trim());
      if (text.length() == 0) {
      } else if (td.classNames().contains("change")) {
        priceComparisonWithPreviousDayStr = text;
      } else {
        try {
          Double.parseDouble(Util.removeCommaAndNbsp(text));
        } catch (NumberFormatException e) {
          continue;
        }
        realtimePriceStr = text;
      }
    }

    Elements spans = doc.select(CSS_QUERY_IN_DETAIL_PAGE_TO_FIND_SPAN_UNDER_ID_DEAL);

    // マザーズ,札証,札幌ア,東証,東証1部, 東証2部,東証JQG,東証JQS,東証外国,福岡Q, 福証
    for (Element span : spans) {
      String s = span.text().trim();
      if (s.length() > 0
          && (s.indexOf("マ") >= 0
              || s.indexOf("札") >= 0
              || s.indexOf("東") >= 0
              || s.indexOf("福") >= 0)) {
        int index = s.indexOf('(');
        if (index >= 0) {
          s = s.substring(0, index);
        }
        marketStr = s;
        break;
      }
    }
    if (marketStr == null) {
      for (Element span : spans) {
        String s = span.text().trim();
        if (s.length() > 0 && !s.startsWith("(") && !s.equals("PTS") && !s.equals("OTC")) {
          int index = s.indexOf('(');
          if (index >= 0) {
            s = s.substring(0, index);
          }
          marketStr = s;
          break;
        }
      }
    }

    boolean isDebt = false;
    boolean isSelling = false;
    Elements dls = doc.select(CSS_QUERY_IN_DETAIL_PAGE_TO_FIND_ALL_DL);
    for (Element dl : dls) {
      Elements dt = dl.getElementsByTag("dt");
      Elements dd = dl.getElementsByTag("dd");
      String caption = dt.text().trim();
      String value = Util.normalizeRoundParentheses(dd.text().trim());

      if (caption.startsWith(CAPTION_PREVIOUS_CLOSING_PRICE)) {
        previousClosingPriceStr = value;
      } else if (caption.startsWith(CAPTION_OPENING_PRICE)) {
        openingPriceStr = value;
      } else if (caption.startsWith(CAPTION_HIGH_PRICE)) {
        highPriceStr = value;
      } else if (caption.startsWith(CAPTION_LOW_PRICE)) {
        lowPriceStr = value;
      } else if (caption.startsWith(CAPTION_TRADING_VOLUME_OF_STOCKS)) {
        tradingVolumeOfStocksStr = value;
      } else if (caption.startsWith(CAPTION_TRADING_VALUE_OF_MONEY)) {
        tradingValueOfMoneyStr = value;
      } else if (caption.startsWith(CAPTION_PRICE_LIMIT)) {
        priceLimitStr = value;
      } else if (caption.startsWith(CAPTION_MARKET_CAPITALIZATION)) {
        marketCapitalizationStr = value;
      } else if (caption.startsWith(CAPTION_OUTSTANDING_STOCK_VOLUME)) {
        outstandingStockVolumeStr = value;
      } else if (caption.startsWith(CAPTION_ANNUAL_INTEREST_RATE)) {
        annualInterestRateStr = value;
      } else if (caption.startsWith(CAPTION_DIVIDENDS_PER_SHARE)) {
        dividendsPerShareStr = value;
      } else if (caption.startsWith(CAPTION_PER)) {
        perStr = value;
      } else if (caption.startsWith(CAPTION_PBR)) {
        pbrStr = value;
      } else if (caption.startsWith(CAPTION_EPS)) {
        epsStr = value;
      } else if (caption.startsWith(CAPTION_BPS)) {
        bpsStr = value;
      } else if (caption.startsWith(CAPTION_MINIMUM_PURCHASE_AMOUNT)) {
        minimumPurchaseAmountStr = value;
      } else if (caption.startsWith(CAPTION_SHARE_UNIT_NUMBER)) {
        shareUnitNumberStr = value;
      } else if (caption.startsWith(CAPTION_YEARLY_HIGH)) {
        yearlyHighStr = value;
      } else if (caption.startsWith(CAPTION_YEARLY_LOW)) {
        yearlyLowStr = value;
      } else if (caption.startsWith(CAPTION_NET_ASSETS)) {
        netAssetsStr = value;
      } else if (caption.startsWith(CAPTION_UNIT_OF_TRADING)) {
        unitOfTradingStr = value;
      } else if (caption.startsWith(CAPTION_MANAGEMENT_COMPANY)) {
        managementCompanyStr = value;
      } else if (caption.startsWith(CAPTION_TYPE_OF_ASSETS_TO_BE_INVESTED)) {
        typeOfAssetsToBeInvestedStr = value;
      } else if (caption.startsWith(CAPTION_REGION_TO_BE_INVESTED)) {
        regionToBeInvestedStr = value;
      } else if (caption.startsWith(CAPTION_UNDERLYING_INDEX)) {
        underlyingIndexStr = value;
      } else if (caption.startsWith(CAPTION_SETTLEMENT_FREQUENCY)) {
        settlementFrequencyStr = value;
      } else if (caption.startsWith(CAPTION_SETTLEMENT_MONTH)) {
        settlementMonthStr = value;
      } else if (caption.startsWith(CAPTION_LISTED_DATE)) {
        listedDateStr = value;
      } else if (caption.startsWith(CAPTION_TRUST_FEE)) {
        trustFeeStr = value;
      } else if (caption.startsWith(CAPTION_MARGIN_DEBT_BALANCE)) {
        marginDebtBalanceStr = value;
        isDebt = true;
        isSelling = false;
      } else if (caption.startsWith(CAPTION_MARGIN_RATIO_COMPARISON_WITH_PREVIOUS_WEEK)) {
        if (isDebt) {
          marginDebtBalanceRatioComparisonWithPreviousWeekStr = value;
        } else if (isSelling) {
          marginSellingBalanceRatioComparisonWithPreviousWeekStr = value;
        }
      } else if (caption.startsWith(CAPTION_MARGIN_SELLING_BALANCE)) {
        marginSellingBalanceStr = value;
        isDebt = false;
        isSelling = true;
      } else if (caption.startsWith(CAPTION_RATIO_OF_MARGIN_BALANCE)) {
        ratioOfMarginBalanceStr = value;
      } else {
        if (!caption.equals("")
            && !caption.startsWith("値上がり率")
            && !caption.startsWith("値下がり率")
            && !caption.startsWith("[買い]")
            && !caption.startsWith("[売り]")
            && value.indexOf("リアルタイム株価") < 0) {

          //
          // TODO: unknown data format
          //
          System.out.println("unknown caption=" + caption);
          System.out.println("unknown value=" + value);
        }
      }
    }
  }
Beispiel #14
0
 @Override
 public Element clone() {
   Element clone = (Element) super.clone();
   clone.classNames = null; // derived on first hit, otherwise gets a pointer to source classnames
   return clone;
 }