Exemplo n.º 1
0
  @Test
  public void insertChildrenAtPosition() {
    Document doc =
        Jsoup.parse(
            "<div id=1>Text1 <p>One</p> Text2 <p>Two</p></div><div id=2>Text3 <p>Three</p></div>");
    Element div1 = doc.select("div").get(0);
    Elements p1s = div1.select("p");
    Element div2 = doc.select("div").get(1);

    assertEquals(2, div2.childNodeSize());
    div2.insertChildren(-1, p1s);
    assertEquals(2, div1.childNodeSize()); // moved two out
    assertEquals(4, div2.childNodeSize());
    assertEquals(3, p1s.get(1).siblingIndex()); // should be last

    List<Node> els = new ArrayList<Node>();
    Element el1 = new Element(Tag.valueOf("span"), "").text("Span1");
    Element el2 = new Element(Tag.valueOf("span"), "").text("Span2");
    TextNode tn1 = new TextNode("Text4", "");
    els.add(el1);
    els.add(el2);
    els.add(tn1);

    assertNull(el1.parent());
    div2.insertChildren(-2, els);
    assertEquals(div2, el1.parent());
    assertEquals(7, div2.childNodeSize());
    assertEquals(3, el1.siblingIndex());
    assertEquals(4, el2.siblingIndex());
    assertEquals(5, tn1.siblingIndex());
  }
Exemplo n.º 2
0
  @Test
  public void testClone() {
    Document doc = Jsoup.parse("<div><p>One<p><span>Two</div>");

    Element p = doc.select("p").get(1);
    Element clone = p.clone();

    assertNull(clone.parent()); // should be orphaned
    assertEquals(0, clone.siblingIndex);
    assertEquals(1, p.siblingIndex);
    assertNotNull(p.parent());

    clone.append("<span>Three");
    assertEquals(
        "<p><span>Two</span><span>Three</span></p>", TextUtil.stripNewlines(clone.outerHtml()));
    assertEquals(
        "<div><p>One</p><p><span>Two</span></p></div>",
        TextUtil.stripNewlines(doc.body().html())); // not modified

    doc.body().appendChild(clone); // adopt
    assertNotNull(clone.parent());
    assertEquals(
        "<div><p>One</p><p><span>Two</span></p></div><p><span>Two</span><span>Three</span></p>",
        TextUtil.stripNewlines(doc.body().html()));
  }
Exemplo n.º 3
0
 private static boolean preserveWhitespace(Node node) {
   // looks only at this element and one level up, to prevent recursion & needless stack searches
   if (node != null && node instanceof Element) {
     Element element = (Element) node;
     return element.tag().preserveWhitespace()
         || element.parent() != null && element.parent().tag().preserveWhitespace();
   }
   return false;
 }
Exemplo n.º 4
0
  public String getAVGwebsitesEvaluation(Document currentDoc) {

    int total = 0;
    int currentSum = 0;

    Elements oneStar = currentDoc.select("div.starsprite.short.star1");
    Elements twoStar = currentDoc.select("div.starsprite.short.star2");
    Elements threeStar = currentDoc.select("div.starsprite.short.star3");
    Elements fourStar = currentDoc.select("div.starsprite.short.star4");
    Elements fiveStar = currentDoc.select("div.starsprite.short.star5");

    for (Element currentElem : oneStar) {
      if (currentElem.parent().hasClass("stars")) {
        total++;
        currentSum += 1;
      }
    }

    for (Element currentElem : twoStar) {
      if (currentElem.parent().hasClass("stars")) {
        total++;
        currentSum += 2;
      }
    }

    for (Element currentElem : threeStar) {
      if (currentElem.parent().hasClass("stars")) {
        total++;
        currentSum += 3;
      }
    }

    for (Element currentElem : fourStar) {
      if (currentElem.parent().hasClass("stars")) {
        total++;
        currentSum += 4;
      }
    }

    for (Element currentElem : fiveStar) {
      if (currentElem.parent().hasClass("stars")) {
        total++;
        currentSum += 5;
      }
    }

    double rating = currentSum * 1.0 / total;

    return String.format("%.2f", rating);
  }
Exemplo n.º 5
0
  public String reviseContForBaiduBaike(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eles = doc.select("a#lemma-edit");
    for (Element ele : eles) {
      ele.parent().remove();
    }
    eles = doc.select("div#collectBtn");
    for (Element ele : eles) {
      ele.parent().remove();
    }
    return doc.html();
  }
Exemplo n.º 6
0
 private static void accumulateParents(Element el, Elements parents) {
   Element parent = el.parent();
   if (parent != null && !parent.tagName().equals("#root")) {
     parents.add(parent);
     accumulateParents(parent, parents);
   }
 }
Exemplo n.º 7
0
  public List<MenuMeal> getMenuMeals(int number) {
    Document doc = null;
    List<MenuMeal> meals = new ArrayList<>();

    try {
      doc =
          Jsoup.connect(String.format(URL, number))
              .userAgent("Chrome/49.0.2623.112")
              .referrer("https://www.google.ru/")
              .timeout(7000)
              .get();
    } catch (IOException e) {
      e.printStackTrace();
    }
    if (doc == null) return meals;

    Elements elements = doc.select("td[width=400");

    if (!elements.isEmpty()) {
      for (Element element : elements) {
        Element parent = element.parent();
        MenuMeal menuMeal = new MenuMeal();

        menuMeal.setDescription(parent.select("div[id=ssilka]").first().text());
        String cost = parent.select("div[id=ssilka]").last().text();
        menuMeal.setCost(Integer.valueOf(cost.substring(0, cost.indexOf("-"))));

        meals.add(menuMeal);
      }
      return meals;
    } else {
      return meals;
    }
  }
 private static Node toNode(Element aInElement) {
   int i = aInElement.siblingIndex();
   Node lNode = aInElement.parent().childNode(i);
   if (!lNode.nodeName().equals(aInElement.tagName())) {
     throw new RuntimeException(lNode.nodeName() + " != " + aInElement.tagName());
   }
   return lNode;
 }
Exemplo n.º 9
0
  public Statistic parse(Document doc, String sport, String competition) {
    Statistic stat = parse(doc);

    Elements sportStatsLinks = doc.select("a[href*=sport_name=" + sport + "]");
    for (Element el : sportStatsLinks) {
      if (el.parent().hasClass("infoPageText2") && competition.equalsIgnoreCase(el.text())) {
        // String competitionName = el.text();
        linkToCompStat = el.attr("href").replaceAll("\u00A0", "").replaceAll(" ", "");
        // String value =
        // el.parent().parent().select("td").get(1).text();
      } else if (el.parent().hasClass("infoPageTitle2")) {
        linkToSportStat = el.attr("href").replaceAll("\u00A0", "");
      }
    }
    logger.debug(stat);
    return stat;
  }
Exemplo n.º 10
0
 protected int calculatePosition(Element root, Element element) {
   int pos = 0;
   Elements family = element.parent().children();
   for (int i = 0; i < family.size(); i++) {
     if (family.get(i).tag().equals(element.tag())) pos++;
     if (family.get(i) == element) break;
   }
   return pos;
 }
Exemplo n.º 11
0
 private List<LocNode> getNodeList(String locStr) {
   LinkedList<LocNode> nodeList = new LinkedList<LocNode>();
   // 查找符合locStr的节点
   Element locElement = locData.select("[Name=" + locStr + "]").first();
   if (locElement != null) {
     nodeList.addFirst(LocNode.parseNode(locElement));
     while ((locElement = locElement.parent()) != null) {
       LocNode curNode = LocNode.parseNode(locElement);
       if (curNode != null) nodeList.addFirst(curNode);
       else break;
     }
   }
   return nodeList;
 }
Exemplo n.º 12
0
  public String reviseImgForYuehui(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eleimages = doc.select("input[name=\"hiddenimg\"]");
    if (eleimages.size() > 0) {
      for (Element img : eleimages) {
        Attributes attrs = img.attributes();
        String source = attrs.get("value");
        img.parent().before("<img src=\"" + source + "\" />");
      }
    }
    return doc.html();
  }
Exemplo n.º 13
0
  public void download(Connection aInConnection, Collection<Image> images) throws IOException {
    aInConnection.url(url);
    Document lDocument = aInConnection.get();
    Element lMain = lDocument.getElementById("main");
    Elements lContents = lMain.getElementsByClass("content");

    if (lContents.size() == 1) {
      StringBuilder sb = new StringBuilder();
      Element lContent = lContents.first();

      collectImages(lContent, images);

      Elements lLightboxElements = lContent.getElementsByClass("lightbox");
      for (Element lLightboxElement : lLightboxElements) {
        Collection<Node> lImageNodes = extractImageNodes(lLightboxElement);

        Element lParent = lLightboxElement.parent();
        int i = lLightboxElement.siblingIndex();
        lParent.insertChildren(i, lImageNodes);
        lLightboxElement.remove();
      }

      Elements lChildElements = lContent.children();
      for (Element lChildElement : lChildElements) {
        if (lChildElement.hasClass("clear")) {
          // no more post content
          break;
        }

        if (title == null && lChildElement.tagName().equals("h1")) {
          // the first h1 header is the title
          title = lChildElement.html();
        } else {
          if (excerpt == null && lChildElement.tagName().equals("p")) {
            excerpt = lChildElement.text();
          }
          String lStr = lChildElement.toString();
          sb.append(lStr);
        }
      }

      content = sb.toString();

      Elements lDateElements = lContent.getElementsByClass("date");
      String lHunDate = lDateElements.first().html();
      date = new PostDate(lHunDate);
    } else {
      System.out.println("More than one content in main section of post page " + toString());
    }
  }
  protected void append(Element node, StringBuilder sb, String tagName) {
    // is select more costly then getElementsByTag?
    MAIN:
    for (Element e : node.select(tagName)) {
      Element tmpEl = e;
      // check all elements until 'node'
      while (tmpEl != null && !tmpEl.equals(node)) {
        if (unlikely(tmpEl)) continue MAIN;
        tmpEl = tmpEl.parent();
      }

      String text = node2Text(e);
      if (text.isEmpty()
          || text.length() < minParagraphText
          || text.length() > SHelper.countLetters(text) * 2) continue;

      sb.append(text);
      sb.append("\n\n");
    }
  }
Exemplo n.º 15
0
 public Map doProcess() {
   if (!isInit) {
     init();
   }
   Iterator<String> it = map.keySet().iterator();
   while (it.hasNext()) {
     String key = it.next();
     Element val = map.get(key);
     // 如果是ul或者table,用相应的组件渲染
     if (("ul".equals(val.tagName().toLowerCase()))
         || ("table".equals(val.tagName().toLowerCase()))) {
       System.out.println("------------------列表开始-----------------------------");
       Elements links = val.select("a");
       for (Element ele : links) {
         System.out.println("a:" + ele.attr("abs:href") + ",文本:" + ele.text());
       }
       System.out.println("------------------列表结束-----------------------------");
     } else {
       System.out.println("------------------非列表-----------------------------");
       if ("a".equals(val.tagName().toLowerCase())) {
         System.out.println("a:" + val.attr("abs:href") + ",文本:" + val.text());
       } else {
         if ("span".equals(val.tagName().toLowerCase())) {
           System.out.println("父容器:" + val.parent().tagName());
         }
         System.out.println("标签:" + val.tagName() + ",html:" + val.html());
       }
     }
   }
   System.out.println("---------------top menu---------------------");
   it = topMenumap.keySet().iterator();
   while (it.hasNext()) {
     String key = (String) it.next();
     Element val = topMenumap.get(key);
     Elements links = val.select("a");
     for (Element ele : links) {
       System.out.println("a:" + ele.attr("abs:href") + ",文本:" + ele.text());
     }
   }
   return map;
 }
  public List<AwardMovie> getList(AwardYear awardYear) throws IOException {
    List<AwardMovie> awards = new ArrayList<AwardMovie>();
    // This has keyword and pagination
    URL url = new URL("http://en.wikipedia.org" + awardYear.getWikipediaSource());
    Document doc = Jsoup.parse(url, 3000);

    Element awardSpan = doc.select("span[id=Awards").first();
    Element uiElm = awardSpan.parent().nextElementSibling();
    Iterator<Element> awardLiList = uiElm.select("li").iterator();
    while (awardLiList.hasNext()) {
      Element liElm = awardLiList.next();
      Elements elms = liElm.select("a");
      if (elms.size() > 0) {
        Element aElm = elms.get(0);
        Element movieElm = elms.get(1);
        logger.debug("aElm=" + aElm.text());
        AwardMovie movie =
            new AwardMovie(movieElm.attr("href"), movieElm.text(), awardYear.getYear());
        if (aElm.text().equals("Palme d'Or")) {
          movie.setBestMovie(true);
          logger.debug(movie);
          awards.add(movie);
        } else if (aElm.text().equals("Best Actor")) {
          movie.setBestLeadHero(true);
          logger.debug(movie);
          awards.add(movie);
        } else if (aElm.text().equals("Best Actress")) {
          movie.setBestLeadHeroine(true);
          logger.debug(movie);
          awards.add(movie);
        } else if (aElm.text().equals("Best Director")) {
          movie.setBestDirector(true);
          logger.debug(movie);
          awards.add(movie);
        }
      }
    }
    return awards;
  }
Exemplo n.º 17
0
  public String reviseImgForZhiHuApp(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements noeles = doc.select("noscript");
    for (Element no : noeles) {
      Elements eleimages = no.getElementsByTag("img");
      for (Element img : eleimages) {
        Attributes attrs = img.attributes();
        String source = attrs.get("src");
        img.parent().before("<img src=\"" + source + "\" />");
      }
      no.remove();
    }
    Elements eleimages = doc.select("img");
    for (Element img : eleimages) {
      String source = img.attr("data-original"), s2 = img.attr("data-actualsrc");
      if (!source.equals("")) img.attr("src", source);
      if (!s2.equals("")) img.attr("src", s2);
    }
    return doc.html();
  }
Exemplo n.º 18
0
  private List<ViewModel> parseList(Document doc) {
    List<ViewModel> list = new ArrayList<ViewModel>();
    Elements files = doc.select("div.MiniEntry");

    for (Element element : files) {
      element = element.parent();
      try {
        ViewModel model = new ViewModel();
        String url = element.select("h1").parents().attr("href");
        model.setSlug(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf(".")));
        model.setTitle(element.select("h1").text());
        model.setSummary(element.select("div.Descriptor").text());

        String ln = element.select("div.Genre > div.floatleft").eq(0).select("img").attr("src");
        ln = ln.substring(ln.lastIndexOf("/") + 1);
        ln = ln.substring(0, ln.indexOf("."));
        int lnId = Integer.valueOf(ln);
        model.setLanguageResId(languageResMap.get(lnId));
        String language = languageKeyMap.get(lnId);

        String genre = element.select("div.Genre > div.floatleft").eq(1).text();
        genre = genre.substring(genre.indexOf(":") + 1).trim();
        if (genre.contains(",")) genre = genre.substring(0, genre.indexOf(","));
        model.setGenre(genre);

        String rating = element.select("div.Genre > div.floatright").text();
        rating = rating.substring(rating.indexOf(":") + 1, rating.indexOf("/") - 1);
        model.setRating(Float.valueOf(rating.trim()));

        model.setImage(getPageLink(model) + "#language=" + language);

        list.add(model);
      } catch (Exception e) {
        Log.e("Kinox", "Error parsing " + element.html(), e);
      }
    }
    return list;
  }
Exemplo n.º 19
0
  private void parseRecord(String record) throws ExecutionException, InterruptedException {
    Element root = Jsoup.parse(record).getElementsByTag("div").first();

    Comment comment = new Comment();

    if (root.className().contains("new")) comment.setNew(true);

    String commentId = root.id();
    comment.setLepraId(commentId);

    if (commentId.equals(commentToSelectId)) commentToSelect = commentsCount;

    Matcher level = patternLevel.matcher(root.className());
    if (level.find()) comment.setLevel(Short.valueOf(level.group(1)));

    Element element = root.getElementsByClass("c_body").first();
    if (element.className().contains("hidden")) return;

    boolean containsImages = false;
    Elements images = element.getElementsByTag("img");
    for (Element image : images) {
      String src = image.attr("src");
      if (isImagesEnabled && !TextUtils.isEmpty(src)) {
        if (!image.parent().tag().getName().equalsIgnoreCase("a"))
          image.wrap("<a href=" + "\"" + src + "\"></a>");

        image.removeAttr("width");
        image.removeAttr("height");
        image.removeAttr("style");

        image.attr("style", "max-width:100%");

        containsImages = true;
      } else image.remove();
    }

    String html = Utils.wrapLepraTags(element);
    comment.setHtml(html);
    comment.setOnlyText(!containsImages && !html.contains("leprosorium.ru"));

    Element authorElement = root.getElementsByClass("ddi").first();
    if (authorElement != null) {
      Elements a = authorElement.getElementsByTag("a");
      String url = Commons.PREFIX_URL + a.first().attr("href");
      url = url.replace("\n", "");
      comment.setUrl(url);

      String author = a.size() > 1 ? a.get(1).text() : a.get(0).text();
      if (postAuthor.equals(author)) comment.setPostAuthor(true);

      String color = "black";
      if (comment.isPostAuthor()) color = "red";
      else if (author.equals(userName)) color = "#3270FF";

      comment.setAuthor(author);

      String signature =
          authorElement.text().split(author)[0]
              + "<b><font color=\""
              + color
              + "\">"
              + author
              + "</font></b>";

      String epochDate =
          authorElement.getElementsByClass("js-date").first().attr("data-epoch_date");
      Date date = new Date(Long.valueOf(epochDate) * 1000);

      signature =
          signature
              + " "
              + date
                  .toLocaleString(); // DateUtils.getRelativeTimeSpanString(date.getTime(), new
                                     // Date().getTime(), DateUtils.FORMAT_ABBREV_RELATIVE);

      comment.setSignature(signature);
    }

    if (!post.isInbox()) {
      Element vote = root.getElementsByClass("vote").first();
      if (vote != null) {
        if (!vote.select(".vote_button.vote_button_plus.vote_voted").isEmpty())
          comment.setPlusVoted(true);
        else if (!vote.select(".vote_button.vote_button_minus.vote_voted").isEmpty())
          comment.setMinusVoted(true);

        Element rating = vote.getElementsByClass("vote_result").first();
        comment.setRating(Short.valueOf(rating.text()));
      }
    }

    comment.setNum(commentsCount);

    ServerWorker.Instance().addNewComment(post.getId(), comment);

    commentsCount++;

    if (commentToSelectId != null) {
      if (commentToSelect != -1 && commentsCount >= 50 + commentToSelect) {
        notifyAboutFirstCommentsUpdate();

        commentToSelectId = null;
        commentToSelect = -1;
      }
    } else {
      if (commentsCount == 50) {
        notifyAboutFirstCommentsUpdate();
      } else if (commentsCount != 0 && commentsCount % 100 == 0) {
        notifyAboutCommentsUpdate();
      }
    }
  }
Exemplo n.º 20
0
  @Override
  public List<SearchField> parseSearchFields()
      throws IOException, OpacErrorException, JSONException {
    start();
    String html =
        httpGet(opac_url + "/Search/Advanced?mylang = " + languageCode, getDefaultEncoding());
    Document doc = Jsoup.parse(html);

    List<SearchField> fields = new ArrayList<>();

    Elements options = doc.select("select#search_type0_0 option");
    for (Element option : options) {
      TextSearchField field = new TextSearchField();
      field.setDisplayName(option.text());
      field.setId(option.val());
      field.setHint("");
      field.setData(new JSONObject());
      field.getData().put("meaning", option.val());
      fields.add(field);
    }
    if (fields.size() == 0) {
      // Weird JavaScript, e.g. view-source:http://vopac.nlg.gr/Search/Advanced
      Pattern pattern_key = Pattern.compile("searchFields\\[\"([^\"]+)\"\\] = \"([^\"]+)\";");
      for (Element script : doc.select("script")) {
        if (!script.html().contains("searchFields")) continue;
        for (String line : script.html().split("\n")) {
          Matcher matcher = pattern_key.matcher(line);
          if (matcher.find()) {
            TextSearchField field = new TextSearchField();
            field.setDisplayName(matcher.group(2));
            field.setId(matcher.group(1));
            field.setHint("");
            field.setData(new JSONObject());
            field.getData().put("meaning", field.getId());
            fields.add(field);
          }
        }
      }
    }

    Elements selects = doc.select("select");
    for (Element select : selects) {
      if (!select.attr("name").equals("filter[]")) continue;
      DropdownSearchField field = new DropdownSearchField();
      if (select.parent().select("label").size() > 0) {
        field.setDisplayName(select.parent().select("label").first().text());
      }
      field.setId(select.attr("name") + select.attr("id"));
      List<Map<String, String>> dropdownOptions = new ArrayList<>();
      String meaning = select.attr("id");
      field.addDropdownValue("", "");
      for (Element option : select.select("option")) {
        if (option.val().contains(":")) {
          meaning = option.val().split(":")[0];
        }
        field.addDropdownValue(option.val(), option.text());
      }
      field.setData(new JSONObject());
      field.getData().put("meaning", meaning);
      fields.add(field);
    }

    return fields;
  }
Exemplo n.º 21
0
  protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException {
    doc.setBaseUri(opac_url + "/Search/Results");

    if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) {
      throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text());
    } else if (doc.select("div.result").size() == 0 && doc.select(".main p").size() > 0) {
      throw new OpacErrorException(doc.select(".main p").first().text());
    }

    int rescount = -1;
    if (doc.select(".resulthead").size() == 1) {
      rescount = Integer.parseInt(doc.select(".resulthead strong").get(2).text());
    }
    List<SearchResult> reslist = new ArrayList<>();

    for (Element row : doc.select("div.result")) {
      SearchResult res = new SearchResult();
      Element z3988el = null;
      if (row.select("span.Z3988").size() == 1) {
        z3988el = row.select("span.3988").first();
      } else if (row.parent().tagName().equals("li")
          && row.parent().select("span.Z3988").size() > 0) {
        z3988el = row.parent().select("span.3988").first();
      }
      if (z3988el != null) {
        List<NameValuePair> z3988data;
        try {
          StringBuilder description = new StringBuilder();
          z3988data =
              URLEncodedUtils.parse(
                  new URI("http://dummy/?" + z3988el.select("span.Z3988").attr("title")), "UTF-8");
          for (NameValuePair nv : z3988data) {
            if (nv.getValue() != null) {
              if (!nv.getValue().trim().equals("")) {
                if (nv.getName().equals("rft.btitle")) {
                  description.append("<b>").append(nv.getValue()).append("</b>");
                } else if (nv.getName().equals("rft.atitle")) {
                  description.append("<b>").append(nv.getValue()).append("</b>");
                } else if (nv.getName().equals("rft.au")) {
                  description.append("<br />").append(nv.getValue());
                } else if (nv.getName().equals("rft.date")) {
                  description.append("<br />").append(nv.getValue());
                }
              }
            }
          }
          res.setInnerhtml(description.toString());
        } catch (URISyntaxException e) {
          e.printStackTrace();
        }
      } else {
        res.setInnerhtml(row.select("a.title").text());
      }

      if (row.hasClass("available") || row.hasClass("internet")) {
        res.setStatus(SearchResult.Status.GREEN);
      } else if (row.hasClass("reservable")) {
        res.setStatus(SearchResult.Status.YELLOW);
      } else if (row.hasClass("not-available")) {
        res.setStatus(SearchResult.Status.RED);
      } else if (row.select(".status.available").size() > 0) {
        res.setStatus(SearchResult.Status.GREEN);
      } else if (row.select(".status .label-success").size() > 0) {
        res.setStatus(SearchResult.Status.GREEN);
      } else if (row.select(".status .label-important").size() > 0) {
        res.setStatus(SearchResult.Status.RED);
      } else if (row.select(".status.checkedout").size() > 0) {
        res.setStatus(SearchResult.Status.RED);
      }

      for (Map.Entry<String, SearchResult.MediaType> entry : mediaTypeSelectors.entrySet()) {
        if (row.select(entry.getKey()).size() > 0) {
          res.setType(entry.getValue());
          break;
        }
      }

      for (Element img : row.select("img")) {
        String src = img.absUrl("src");
        if (src.contains("over")) {
          if (!src.contains("Unavailable")) {
            res.setCover(src);
          }
          break;
        }
      }

      res.setPage(page);
      String href = row.select("a.title").first().absUrl("href");
      try {
        URL idurl = new URL(href);
        String path = idurl.getPath();
        Matcher matcher = idPattern.matcher(path);
        if (matcher.find()) {
          res.setId(matcher.group(1));
        }
      } catch (MalformedURLException e) {
        e.printStackTrace();
      }
      reslist.add(res);
    }

    return new SearchRequestResult(reslist, rescount, page);
  }
Exemplo n.º 22
0
  public void onPostExecute(AnswerObject result) {
    Document doc = Jsoup.parse(result.getHTML());
    sendHTMLatBug(doc.html());
    if (doc.select("span.notLoggedText").text().length() > 0) {
      Intent BackToLoginIntent = new Intent(this, TuCanMobileActivity.class);
      BackToLoginIntent.putExtra("lostSession", true);
      startActivity(BackToLoginIntent);
    } else {
      if (PREPCall == false) {
        String Title = doc.select("h1").text();

        TextView SingleEventTitle = (TextView) findViewById(R.id.singleevent_title);
        SingleEventTitle.setText(Title);
        Elements Deltarows = doc.select("table[courseid]").first().select("tr");
        Element rows;
        if (Deltarows.size() == 1) {
          rows = Deltarows.get(0).select("td").first();
        } else {
          rows = Deltarows.get(1).select("td").first();
        }

        Elements Paragraphs = rows.select("p");
        Iterator<Element> PaIt = Paragraphs.iterator();
        ArrayList<String> titles = new ArrayList<String>();
        ArrayList<String> values = new ArrayList<String>();

        while (PaIt.hasNext()) {

          Element next = PaIt.next();
          String[] information = crop(next.html());
          titles.add(information[0]);
          values.add(information[1]);
        }

        PropertyValueAdapter = new SingleEventAdapter(titles, values);
        setListAdapter(PropertyValueAdapter);

        // Termin-Selektor:
        // Terminselektor

        Iterator<Element> captionIt = doc.select("caption").iterator();
        Iterator<Element> DateTable = null;
        Iterator<Element> materialTable = null;
        while (captionIt.hasNext()) {
          Element next = captionIt.next();
          if (next.text().equals("Termine")) {
            System.out.println(next.parent().html());
            DateTable = next.parent().select("tr").iterator();
          } else if (next.text().contains("Material")) {

            materialTable = next.parent().select("tr").iterator();
          }
        }
        ArrayList<String> eventNumber = new ArrayList<String>();
        ArrayList<String> eventDate = new ArrayList<String>();
        ArrayList<String> eventTime = new ArrayList<String>();

        ArrayList<String> eventRoom = new ArrayList<String>();
        ArrayList<String> eventInstructor = new ArrayList<String>();

        while (DateTable.hasNext()) {
          Element next = DateTable.next();
          Elements cols = next.select("td");
          eventNumber.add(cols.get(0).text());
          eventDate.add(cols.get(1).text());
          eventTime.add(cols.get(2).text() + "-" + cols.get(3).text());
          eventRoom.add(cols.get(4).text());
          eventInstructor.add(cols.get(5).text());
        }

        DateAppointmentAdapter =
            new AppointmentAdapter(eventDate, eventTime, eventNumber, eventRoom, eventInstructor);

        int ct = 0;
        ArrayList<String> materialNumber = new ArrayList<String>();
        ArrayList<String> materialName = new ArrayList<String>();
        ArrayList<String> materialDesc = new ArrayList<String>();
        materialLink = new ArrayList<String>();
        ArrayList<String> materialFile = new ArrayList<String>();
        if (materialTable != null) {
          while (materialTable.hasNext()) {
            Element next = materialTable.next();

            if (next.select("td").size() > 1) {
              ct++;
              System.out.println(ct + "  " + (ct % 3));
              int mod = (ct % 3);
              switch (mod) {
                case 1:
                  materialNumber.add(next.select("td").get(0).text());
                  materialName.add(next.select("td").get(1).text());

                  break;
                case 2:
                  materialDesc.add(next.select("td").get(1).text());
                  if (next.attr("class").equals("tbdata_nob")) {
                    ct++;
                    materialLink.add("");
                    materialFile.add("");
                  }
                  break;
                case 0:
                  materialLink.add(next.select("td").get(1).select("a").attr("href"));
                  materialFile.add(next.select("td").get(1).select("a").text());
                  break;
              }
            }
          }
        }

        if (ct > 2) {
          FileAdapter =
              new AppointmentAdapter(
                  materialNumber, materialFile, null, materialName, materialDesc);
          thereAreFiles = true;
        } else
          FileAdapter =
              new ArrayAdapter<String>(
                  this, android.R.layout.simple_list_item_1, new String[] {"Kein Material"});

      } else {
        String nextlink =
            TucanMobile.TUCAN_PROT
                + TucanMobile.TUCAN_HOST
                + doc.select("div.detailout").select("a").attr("href");
        SimpleSecureBrowser callOverviewBrowser = new SimpleSecureBrowser(this);
        RequestObject thisRequest =
            new RequestObject(nextlink, localCookieManager, RequestObject.METHOD_GET, "");
        PREPCall = false;
        callOverviewBrowser.execute(thisRequest);
      }
    }
  }
Exemplo n.º 23
0
 public void tail(Node source, int depth) {
   if (source instanceof Element && whitelist.isSafeTag(source.nodeName())) {
     destination = destination.parent(); // would have descended, so pop destination stack
   }
 }
  private void migrateRightList(
      Document doc,
      Node trainingAndEventsRightNode,
      Session session,
      String locale,
      Map<String, String> urlMap)
      throws RepositoryException {
    Elements listElements = doc.select("div.n13-pilot");

    // Check for the follow us
    Elements followUs =
        !doc.select("div.fw-cisco-assistant").isEmpty()
            ? doc.select("div.fw-cisco-assistant").select("div.s14-pilot")
            : null;
    if (followUs != null && !followUs.isEmpty()) {
      sb.append(Constants.FOLLOWUS_NODE_NOT_FOUND);
    } else {
      log.debug("Follow us does not exists");
    }
    // end of check for follow us

    // Check for image
    Element listEle = listElements.first();
    if (listEle != null) {
      Elements imgElements = listEle.getElementsByTag("img");
      if (imgElements != null && imgElements.size() > 0) {
        int count = 0;
        for (Element imgElement : imgElements) {
          count = count + 1;
        }
        sb.append("<li>" + "" + count + " extra images found in the right List</li>");
      }
      Element sibling = listEle.nextElementSibling();
      if (sibling != null) {
        Elements image = sibling.getElementsByTag("img");
        if (!image.isEmpty()) {
          sb.append(Constants.EXTRA_IMG_FOUND_IN_RIGHT_PANEL);
        }
      }
    }
    // end of check for image
    if (listElements.size() > 0) {
      int count = 0;
      for (Element listElement : listElements) {
        if (listElement.parent().hasClass("gd-right")) {
          count = count + 1;
        }
      }
      NodeIterator listNodeIterator =
          trainingAndEventsRightNode.hasNodes()
              ? trainingAndEventsRightNode.getNodes("list*")
              : null;
      Elements ulEle = null;
      if (listNodeIterator != null) {
        int nodeSize = (int) listNodeIterator.getSize();
        log.debug("node Size" + nodeSize + "ele Size" + count);
        if (count == nodeSize) {
          Node listNode = null;
          for (Element ele : listElements) {
            listNode = (Node) listNodeIterator.next();
            setListElements(ele, listNode, session, locale, urlMap);
          }
        } else if (nodeSize < count) {
          Node listNode;
          for (Element ele : listElements) {
            ulEle = ele.getElementsByTag("ul");
            if (listNodeIterator.hasNext()) {
              if (!ulEle.isEmpty()) {
                listNode = (Node) listNodeIterator.next();
                setListElements(ele, listNode, session, locale, urlMap);
              }
            }
          }
          sb.append(
              Constants.MISMATCH_IN_LIST_NODES + count + Constants.LIST_NODES_COUNT + nodeSize);
        } else if (nodeSize > count) {
          Node listNode;
          for (Element ele : listElements) {
            listNode = (Node) listNodeIterator.next();
            setListElements(ele, listNode, session, locale, urlMap);
          }
          sb.append(
              Constants.MISMATCH_IN_LIST_NODES + count + Constants.LIST_NODES_COUNT + nodeSize);
        }
      } else {
        sb.append(Constants.LIST_NODE_NOT_FOUND);
      }
    } else {
      sb.append("<li>List component not found in web url</li>");
    }
  }
Exemplo n.º 25
0
  /**
   * @param url
   * @param output
   * @return
   */
  public String getContentPr(String url) {
    Document doc;
    String result = "";
    try {
      doc = Jsoup.connect(url).timeout(100000).get();
      // doc is null
      if (doc == null) {
        return null;
      }

      // No such data.
      Elements strongElements = doc.getElementsByTag("strong");
      for (Element strong : strongElements) {
        if (strong.ownText().contains("No such data.")) return null;
      }

      Elements tbodys = doc.getElementsByTag("tbody");
      Element tbody = null;
      if (tbodys.size() > 2) {
        tbody = tbodys.get(2);
      } else {
        return result;
      }

      Elements nobrs = tbody.getElementsByTag("nobr");
      for (Element nobr : nobrs) {
        String nobr_str = nobr.ownText();
        // 获取name
        if (nobr_str.equals("Name")) {
          Element other_tr = nobr.parent().nextElementSibling();
          Elements div_tags = other_tr.getElementsByTag("div");
          String[] names = div_tags.get(1).ownText().split("<br>");
          for (String name : names) {
            result += name;
          }
        }

        // 获取Formula
        if (nobr_str.equals("Formula")) {
          result += "\t";
          Element other_tr = nobr.parent().nextElementSibling();
          Elements div_tags = other_tr.getElementsByTag("div");
          String[] names = div_tags.get(0).ownText().split("<br>");
          result += names[0];
        }

        // 获取Exact mass
        if (nobr_str.equals("Exact mass")) {
          result += "\t";
          Element other_tr = nobr.parent().nextElementSibling();
          Elements div_tags = other_tr.getElementsByTag("div");
          String[] names = div_tags.get(0).ownText().split("<br>");
          result += names[0];
        }

        // 获取Mol weight
        if (nobr_str.equals("Mol weight")) {
          result += "\t";
          Element other_tr = nobr.parent().nextElementSibling();
          Elements div_tags = other_tr.getElementsByTag("div");
          String[] names = div_tags.get(0).ownText().split("<br>");
          result += names[0];
        }

        // Other DBs
        if (nobr_str.equals("Other DBs")) {
          result += "\t";
          Element other_tr = nobr.parent().nextElementSibling();
          Elements div_tags = other_tr.getElementsByTag("div");
          for (int i = 0; i < div_tags.size(); i++) {
            result += (div_tags.get(i).ownText());
            if (i > 0 && div_tags.get(i).getElementsByTag("a") != null) {
              // result += ",";// 与前面的串分隔开,Cas number不含a标签
              for (Element a : div_tags.get(i).getElementsByTag("a")) {
                result += (a.ownText() + ",");
              }
              // cas number强行分割开
              result += ",";
            }
          }
        }
      }
    } catch (Exception e1) {
      e1.printStackTrace();
    }
    return result;
  }