@Test
  public void equalsWithUnequalElementReturnsFalse() {
    TypedElement input1 = new TypedElement(Jsoup.parse("<h1/>"));
    TypedElement input2 = new TypedElement(Jsoup.parse("<h2/>"));

    assertThat(input1.equals(input2), is(false));
  }
Beispiel #2
0
 private static String[] crop(String startstring) {
   if (startstring.length() > 0) {
     String[] splitted = startstring.split("</b>");
     return new String[] {Jsoup.parse(splitted[0]).text().trim(), Jsoup.parse(splitted[1]).text()};
   } else {
     return new String[] {"", ""};
   }
 }
Beispiel #3
0
  @Test
  public void testBrHasSpace() {
    Document doc = Jsoup.parse("<p>Hello<br>there</p>");
    assertEquals("Hello there", doc.text());
    assertEquals("Hello there", doc.select("p").first().ownText());

    doc = Jsoup.parse("<p>Hello <br> there</p>");
    assertEquals("Hello there", doc.text());
  }
Beispiel #4
0
  @Test
  public void testById() {
    Elements els =
        Jsoup.parse("<div><p id=foo>Hello</p><p id=foo>Foo two!</p></div>").select("#foo");
    assertEquals(2, els.size());
    assertEquals("Hello", els.get(0).text());
    assertEquals("Foo two!", els.get(1).text());

    Elements none = Jsoup.parse("<div id=1></div>").select("#foo");
    assertEquals(0, none.size());
  }
  public void testWrite(String design, T expected, boolean writeData) {
    String written = write(expected, writeData);

    Element producedElem = Jsoup.parse(written).body().child(0);
    Element comparableElem = Jsoup.parse(design).body().child(0);

    String produced = elementToHtml(producedElem);
    String comparable = elementToHtml(comparableElem);

    Assert.assertEquals(comparable, produced);
  }
  public void parsingHTML(String urlPath) {
    String price = "";
    try {
      URL url = new URL(urlPath);
      Document doc;
      if (server_down) {
        doc = Jsoup.parse(url.openStream(), "UTF-8", urlPath);
      } else {
        doc = Jsoup.parse(url.openStream(), "ISO-8859-1", urlPath);
      }
      // Document doc = Jsoup.parse(url, 2000);
      Elements trs = doc.select("tr");

      for (int a = 2; a <= 6; a++) {
        Elements tds = trs.get(a).select("td");
        for (int i = 1; i <= 5; i++) {
          switch (a) {
              // DATUM
            case 2:
              foodDayList.get(i - 1).setDate(tds.get(i).select("td").text());
              // Log.v(TAG, "date: " + tds.get(i).select("td").text());
              break;
              // ESSEN 1 DESC
            case 3:
              foodDayList.get(i - 1).setEssen1Desc(tds.get(i).select("td").text());
              break;
            case 4:
              // TODO: ESSEN 1 PREIS
              price = "";
              for (int p = 0; p < 4; p++) {
                price += tds.get((i - 1) * 4 + p).text();
              }
              foodDayList.get(i - 1).setEssen1Price(price);
              break;
            case 5:
              // ESSEN 2 DESC
              foodDayList.get(i - 1).setEssen2Desc(tds.get(i).select("td").text());
              break;
            case 6:
              // TODO ESSEN 2 PREIS
              price = "";
              for (int p = 0; p < 4; p++) {
                price += tds.get((i - 1) * 4 + p).text();
              }
              foodDayList.get(i - 1).setEssen2Price(price);
              break;
          }
        }
      }
    } catch (Exception e) {
      Log.e(TAG, e.getMessage());
    }
  }
Beispiel #7
0
  @Test
  public void parsesUnterminatedTag() {
    String h1 = "<p";
    Document doc = Jsoup.parse(h1);
    assertEquals(1, doc.getElementsByTag("p").size());

    String h2 = "<div id=1<p id='2'";
    doc = Jsoup.parse(h2);
    Element d = doc.getElementById("1");
    assertEquals(1, d.children().size());
    Element p = doc.getElementById("2");
    assertNotNull(p);
  }
Beispiel #8
0
  @Override
  public List<TweetOut> getTweetOutList(User user) throws DAOException {
    List<TweetOut> lstTweetOut = new ArrayList<>();
    try {
      ResultSet resultSet =
          this.executeQuery(
              SQL_SELECTTWEETOUT,
              false,
              user.getUserId(),
              user.getUserId(),
              user.getUserId(),
              user.getUserId(),
              user.getUserId(),
              user.getUserId());
      Pattern pat = Pattern.compile("@([\\w]+)");
      while (resultSet.next()) {
        EntityMapping<TweetOut> EntityMapping = new EntityMapping<>(TweetOut.class);
        try {
          TweetOut tmpTweetOut = EntityMapping.getMapping(resultSet);
          Matcher match = pat.matcher(tmpTweetOut.getBody());

          if (match.find()) {
            String action = match.group(1);
            DAOUser daoUser = new DAOUser(DAOFactory.getInstance());
            User user1 = daoUser.searchByUserName(action);
            tmpTweetOut.setBody(
                Jsoup.parse(tmpTweetOut.getBody())
                    .text()
                    .replace(
                        "@" + action,
                        "<a class=\"\" href=\"/User?id="
                            + user1.getUserId()
                            + "\"><strong class=\"center-middle-txt\">@"
                            + action
                            + "</strong></a>"));
          } else {
            tmpTweetOut.setBody(Jsoup.parse(tmpTweetOut.getBody()).text());
          }
          lstTweetOut.add(tmpTweetOut);
        } catch (Exception e) {
          throw new DAOException(e);
        }
      }
    } catch (Exception e) {
      throw new DAOException(e);
    } finally {
      this.CloseConnection();
    }
    return lstTweetOut;
  }
Beispiel #9
0
  @Test
  public void testByTag() {
    // should be case insensitive
    Elements els =
        Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><DIV id=3>").select("DIV");
    assertEquals(3, els.size());
    assertEquals("1", els.get(0).id());
    assertEquals("2", els.get(1).id());
    assertEquals("3", els.get(2).id());

    Elements none =
        Jsoup.parse("<div id=1><div id=2><p>Hello</p></div></div><div id=3>").select("span");
    assertEquals(0, none.size());
  }
Beispiel #10
0
  @Test
  public void testGetElementById() {
    Document doc = Jsoup.parse(reference);
    Element div = doc.getElementById("div1");
    assertEquals("div1", div.id());
    assertNull(doc.getElementById("none"));

    Document doc2 =
        Jsoup.parse("<div id=1><div id=2><p>Hello <span id=2>world!</span></p></div></div>");
    Element div2 = doc2.getElementById("2");
    assertEquals("div", div2.tagName()); // not the span
    Element span = div2.child(0).getElementById("2"); // called from <p> context should be span
    assertEquals("span", span.tagName());
  }
Beispiel #11
0
  @Test
  public void testByClass() {
    Elements els =
        Jsoup.parse("<p id=0 class='ONE two'><p id=1 class='one'><p id=2 class='two'>")
            .select("P.One");
    assertEquals(2, els.size());
    assertEquals("0", els.get(0).id());
    assertEquals("1", els.get(1).id());

    Elements none = Jsoup.parse("<div class='one'></div>").select(".foo");
    assertEquals(0, none.size());

    Elements els2 = Jsoup.parse("<div class='One-Two'></div>").select(".one-two");
    assertEquals(1, els2.size());
  }
Beispiel #12
0
  public String reviseContForLieyunwang(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eles = doc.select("div#share-box");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("div[id^=BAIDU]");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("iframe[id^=360_HOT]");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("div.n_article");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("div#comment-box");
    for (Element ele : eles) {
      ele.remove();
    }

    return doc.html();
  }
Beispiel #13
0
  public String reviseContForTieba(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eles = doc.select("div.BAIDU_CLB_AD");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("ul.p_mtail");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("ul.p_props_tail");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("div.thread_recommend");
    for (Element ele : eles) {
      ele.remove();
    }
    eles = doc.select("div.j_lzl_container");
    for (Element ele : eles) {
      ele.remove();
    }
    return doc.html();
  }
Beispiel #14
0
  @Test
  public void designIsSerializedWithCorrectPrefixesAndPackageNames() throws IOException {
    ByteArrayOutputStream out = serializeDesign(ctx);

    // Check the mapping from prefixes to package names using the html tree
    String[] expectedPrefixes = {"my"};
    String[] expectedPackageNames = {"com.addon.mypackage"};
    int index = 0;

    Document doc = Jsoup.parse(out.toString("UTF-8"));
    Element head = doc.head();
    for (Node child : head.childNodes()) {
      if ("meta".equals(child.nodeName())) {
        String name = child.attributes().get("name");
        if ("package-mapping".equals(name)) {
          String content = child.attributes().get("content");
          String[] parts = content.split(":");
          assertEquals("Unexpected prefix.", expectedPrefixes[index], parts[0]);
          assertEquals("Unexpected package name.", expectedPackageNames[index], parts[1]);
          index++;
        }
      }
    }
    assertEquals("Unexpected number of prefix - package name pairs.", 1, index);
  }
Beispiel #15
0
  /**
   * @param url
   * @return
   */
  public static ProductInfo parse(String url) {
    ProductInfo productInfo = new ProductInfo();

    PrintLogTool.info("开始解析" + category.message() + "[URL=" + url + "]", logger);

    // 商品类型
    productInfo.setCategory(category);

    try {
      doc = Jsoup.parse(new URL(url), 20000);
    } catch (MalformedURLException e) {
      logger.error("", e);
    } catch (IOException e) {
      logger.error("", e);
    }

    // 商品编号
    setSerialNo(productInfo, url);

    // 当前价格
    setCurrentPrice(productInfo);

    // 商品名称
    setProductName(productInfo);

    // 商品图片
    setProductPic(productInfo);

    return productInfo;
  }
Beispiel #16
0
  private BancoMegaSena() throws IOException {
    this.concursos = new ArrayList<Concurso>();

    File input = new File("C:\\Users\\Rodrigo Lacerda\\Downloads\\D_mgsasc (1)\\d_megasc.htm");

    Document doc = Jsoup.parse(input, "UTF-8");
    Elements trs = doc.getElementsByTag("tr");
    System.out.println(trs.get(1).getElementsByTag("th"));

    for (Element tr : trs)
      if (tr.getElementsByTag("th").isEmpty()) {
        String codigo = tr.getElementsByTag("td").get(0).text();
        String d1 = tr.getElementsByTag("td").get(2).text();
        String d2 = tr.getElementsByTag("td").get(3).text();
        String d3 = tr.getElementsByTag("td").get(4).text();
        String d4 = tr.getElementsByTag("td").get(5).text();
        String d5 = tr.getElementsByTag("td").get(6).text();
        String d6 = tr.getElementsByTag("td").get(7).text();
        boolean acumulado = tr.getElementsByTag("td").get(15).text().equals("SIM");

        Concurso concurso = new Concurso(Integer.parseInt(codigo));
        concurso.addNumero(Integer.parseInt(d1));
        concurso.addNumero(Integer.parseInt(d2));
        concurso.addNumero(Integer.parseInt(d3));
        concurso.addNumero(Integer.parseInt(d4));
        concurso.addNumero(Integer.parseInt(d5));
        concurso.addNumero(Integer.parseInt(d6));
        concurso.setAcumulado(acumulado);

        this.concursos.add(concurso);
      }
  }
 @Override
 public void upload(
     ComponentParameter compParameter,
     IMultipartFile multipartFile,
     HashMap<String, Object> json) {
   try {
     ID id = ItSiteUtil.getLoginUser(compParameter).getId();
     if (id != null) {
       final Document document =
           Jsoup.parse(
               multipartFile.getInputStream(), compParameter.request.getCharacterEncoding(), "");
       final Elements as = document.getElementsByTag("a");
       for (final Element a : as) {
         if (a.hasAttr("add_date")) {
           final BookmarkBean bean = new BookmarkBean();
           final long t = ConvertUtils.toLong(a.attr("add_date"), 0) * 1000;
           bean.setTitle(a.text());
           bean.setUrl(a.attr("href"));
           bean.setUserId(id);
           bean.setUpdateDate(new Date(t));
           try {
             BookmarkUtils.applicationModule.doUpdate(bean);
           } catch (Exception e) {
           }
         }
       }
     }
   } catch (final Exception e) {
     throw DataObjectException.wrapException("没有权限");
   }
 }
  @Override
  public Object parseHtml2Obj(String html) {
    Document doc = Jsoup.parse(html);
    Element title = doc.getElementById("activity-name");
    Element createtime = doc.getElementById("post-date");
    // Element from = doc.getElementById("post-user");
    Element content = doc.getElementById("essay-body");
    Elements pic = doc.select("#media img");
    Elements _intro = doc.select(".text p");
    String intro = null;
    if (_intro.isEmpty()) {
      intro = "阅读全部";
    } else {
      intro = _intro.first().text();
    }

    // List<ArticleObj> objs = new ArrayList<ArticleObj>();
    ArticleObj obj = new ArticleObj();
    obj.setFrom(account_desc);
    obj.setContent(content.html());
    obj.setCreatetime(createtime.text());
    obj.setTitle(title.text());
    obj.setIntro(intro.substring(0, intro.length() > 50 ? 50 : intro.length()) + "...");
    if (!pic.isEmpty()) {
      String src = pic.get(0).attr("src");
      obj.setPic(getSrc(src));
    }
    System.err.println(obj.getPic());
    dbRobot.AddArticleData(obj);
    cur_count++;
    return null;
  }
 /**
  * 从网址里面抽取链接
  *
  * @return 链接的集合
  */
 public static List<String> getUrlsByPage(String str) {
   List<String> urls = new ArrayList<String>();
   try {
     URL url = new URL(str);
     int end = 0;
     String host = url.getHost();
     Document doc = Jsoup.parse(url, 30000);
     Elements links = doc.select("a");
     String href = null;
     for (Element link : links) {
       href = link.attr("href");
       if (href.startsWith(HTTP)) {
         urls.add(href);
       } else if (href.startsWith("/")) {
         urls.add(HTTP + host + href);
       } else {
         if (end > 0) {
           urls.add(str + href);
         } else {
           urls.add(str + href);
         }
       }
     }
   } catch (MalformedURLException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
   return urls;
 }
Beispiel #20
0
  public String reviseImgForWX(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eleimages = doc.select("img");
    if (eleimages.size() > 0) {
      for (Element img : eleimages) {
        String source = img.attr("data-src");
        int pos = source.lastIndexOf("/") + 1;
        source = source.substring(0, pos);
        img.removeAttr("data-s");
        img.removeAttr("data-src");
        img.removeAttr("data-w");
        img.attr("src", source + "640");
        img.attr("max-width", "640");
      }
    }
    Elements elesrp = doc.select("script");
    Elements divs = doc.select("div");
    if (elesrp.size() > 0 && divs.size() > 0) {

      for (Element ele : elesrp) {
        String s = ele.html();
        Pattern p = Pattern.compile("(?<=(var\\scover\\s=\\s\"))\\S+(?=\")");
        Matcher m = p.matcher(s);
        if (m.find()) {
          String nimg = "<img src=\"" + m.group() + "\"/>";
          divs.get(0).before(nimg);
        }
      }
    }
    return doc.html();
  }
Beispiel #21
0
 public Item(
     String source,
     String link,
     String title,
     String description,
     Date datePublication,
     List enclosure) {
   try {
     this.uri = link;
     values = new ArrayList<Prop>();
     values.add(new Prop(RSS.link, link, true, false));
     values.add(new Prop(RSS.title, title));
     values.add(new Prop(RSS.description, Jsoup.parse(description).text()));
     if (datePublication != null) {
       values.add(new Prop(RSS.getURI() + "pubDate", datePublication.toString()));
       values.add(
           new Prop(RSS.getURI() + "pubDateTime", Long.toString(datePublication.getTime())));
     }
     for (Object o : enclosure) {
       SyndEnclosureImpl e = (SyndEnclosureImpl) o;
       values.add(new Prop(RSS.image, e.getUrl(), true, false));
     }
     values.add(new Prop("http://purl.org/rss/1.0/source", source, false, false));
   } catch (NullPointerException e) {
     logger.error(e);
   }
 }
  @Test
  public void testTextareaElementWithNoAttributes() throws Exception {

    TemplateFunctions functions =
        new TemplateFunctions(applicationContext, viewResolver, localeResolver);

    TextareaElement element = new TextareaElement();
    element.setValue("Test");
    element.setName("string");
    element.setLabel("String");
    element.setRows(20);
    element.setColumns(80);

    String output = functions.render(new MockHttpServletRequest(), element);
    System.out.println(output);
    Document doc = Jsoup.parse(output);

    assertEquals("Attributes has content", "", element.getAttributesAsString());
    assertEquals(
        "Name does not match", "string[0].value", doc.select("textarea").first().attr("name"));
    assertEquals("String Value does not match", "Test", doc.select("textarea").first().text());
    assertEquals(
        "Textarea element rows is not 20", "20", doc.select("textarea").first().attr("rows"));
    assertEquals(
        "Textarea element columns is not 80", "80", doc.select("textarea").first().attr("cols"));
  }
Beispiel #23
0
 private static String makeModular(String html) {
   String text = "";
   Document doc = Jsoup.parse(html);
   Elements els = doc.getAllElements();
   boolean moved = false;
   String url = "";
   for (Element el : els) {
     switch (el.nodeName()) {
       case "title":
         text = el.text();
         if (text.toLowerCase().contains("moved") && text.toLowerCase().contains("permanently")) {
           moved = true;
         }
         break;
       case "body":
         if (moved) {
           url = getMovedUrl(el);
         }
         break;
       default:
         break;
     }
   }
   if (moved) {
     getMovedUrl(doc);
   }
   return text;
 }
Beispiel #24
0
  //// COMPLETAMENTE INUTILE
  public static int[] getPrice(String path) {
    int[] month = new int[31];
    int count = 0;
    try {
      File input = new File(path);
      Document doc = Jsoup.parse(input, "UTF-8", "http://example.com/");
      Elements elementi_div = doc.getElementsByTag("div");
      for (Element e : elementi_div) {
        if (e.text().length() > 0)
          if (Character.isDigit(e.text().charAt(0)) && e.text().contains("€ ")) {
            count++;
            String[] arr = e.text().split(" ");
            month[Integer.parseInt(arr[0]) - 1] = Integer.parseInt(arr[2].replace(".", ""));
          }
      }
    } catch (Exception e) {
      System.out.println(e);
    }
    if (count == 0) {
      System.out.println("Non e' stato scaricato il file");
      // getPrice(path);
    }

    return month;
  }
  @Test
  public void testTextareaElementWithValue() throws Exception {

    TemplateFunctions functions =
        new TemplateFunctions(applicationContext, viewResolver, localeResolver);

    TextareaElement element = new TextareaElement();
    element.setHtmlId("string");
    element.setDefaultValue("Default Value");
    element.setName("string");
    element.setLabel("String");
    element.setValue("not default");

    String output = functions.render(new MockHttpServletRequest(), element);
    Document doc = Jsoup.parse(output);
    assertEquals(
        "Name does not match", "string[0].value", doc.getElementById("string").attr("name"));
    assertEquals(
        "Textarea Value does not match", "not default", doc.getElementById("string").text());
    assertEquals(
        "Label for attribute does not match element id",
        element.getHtmlId(),
        doc.select("label").first().attr("for"));
    assertEquals("Label value does not match", "String", doc.select("label").first().text());
  }
  /** This function is called when a page is fetched and ready to be processed by your program. */
  @Override
  public void visit(Page page) {
    //        System.out.println(page.getWebURL().toString().contains("city")+"
    // "+page.getWebURL());
    // url title date body
    if (page.getWebURL().toString().contains("/ads/")
        || page.getWebURL().toString().contains("/view/")) {
      if (page.getParseData() instanceof HtmlParseData) {
        HtmlParseData htmlParseData = (HtmlParseData) page.getParseData();

        String html = htmlParseData.getHtml();
        Document doc = Jsoup.parse(html);

        String url;
        String title;
        String body = null;
        String date = null;

        url = page.getWebURL().getURL();
        title = doc.title();
        //                System.out.println(title);

        date = doc.getElementsByClass("nameDiv").get(1).text();
        body = doc.getElementsByClass("addsTexts").get(0).text();
        System.out.println(date);
        System.out.println(body);
        //            System.out.println(body);
        //                System.out.println("URL: " + url);
        //                System.out.println("title: " + title);
        //                System.out.println("date: " + date);
        //                System.out.println("body: " + body);
        Indexer.add(url, title, body, date);
      }
    }
  }
Beispiel #27
0
  @GET
  @Path("/logs")
  @Produces({MediaType.TEXT_HTML})
  public String logs() {

    Document doc = null;
    try {
      ArrayList<Logger.Log> logs = Logger.getInstance().getLastLogs();

      File file = new File(getClass().getClassLoader().getResource("logs.html").getFile());

      doc = Jsoup.parse(file, "UTF-8");
      Element tbody = doc.getElementById("logs");

      for (Logger.Log log : logs) {
        Element tr = tbody.appendElement("tr").addClass(log.getType_log());

        tr.appendElement("td").addClass("type").text(log.getType_log());
        tr.appendElement("td").addClass("date").text(log.getDate().toString());
        tr.appendElement("td").addClass("message").text(log.getMessage());
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    if (doc != null) {
      return doc.html();
    }

    return null;
  }
 @Override
 public String fire(String inputContent) throws Exception {
   validate();
   Document document = Jsoup.parse(inputContent);
   Elements elements = document.select(cssSelector);
   return (elements != null && elements.size() > 0 ? elements.html().trim() : null);
 }
 @Override
 public void onTaskFinished(String response) {
   progress.cancel();
   if (response == null) {
     Toast.makeText(this, "An error has occured.", Toast.LENGTH_LONG).show();
     onResume();
     return;
   } else if (response == LocationHelper.LOCATION_FAILED) {
     Toast.makeText(this, "Unable to determine your location.", Toast.LENGTH_LONG).show();
     onResume();
     return;
   }
   Document doc = Jsoup.parse(response);
   if (doc.title().contains("Route")) {
     RouteActivity.start(HomeActivity.this, response);
   } else if (doc.title().contains("Stop")) {
     System.out.println(doc.title());
   } else if (response.contains("Did you mean?")
       && (response.contains("class=\"routeList\"")
           || response.contains("class=\"ambiguousLocations\""))) {
     new DidYouMeanDialog()
         .newInstance(this, response)
         .show(getSupportFragmentManager(), "com.steelhawks.hawkscout.DID_YOU_MEAN_DIALOG");
   } else if (response.contains("<h3>Nearby Stops:</h3>")) {
     NearbyStopsActivity.start(this, response);
   } else {
     new NoResultsDialog()
         .show(getSupportFragmentManager(), "com.steelhawks.hawkscout.NO_RESULTS");
     onResume();
   }
 }
Beispiel #30
0
  public void enviarEmail(List<String> destinos, String mensagem, String titulo)
      throws EmailException {
    SimpleEmail email = new SimpleEmail();

    email.setHostName(this.host);
    // Quando a porta utilizada não é a padrão (gmail = 465)
    email.setSmtpPort(this.porta);

    // Adicione os destinatários
    for (String destino : destinos) {
      email.addTo(destino, "", "UTF-8");
    }
    email.setSentDate(new Date());

    // Configure o seu Email do qual enviará
    email.setFrom(this.email, this.empresa.getNome());
    // Adicione um assunto
    email.setSubject(titulo);
    // Adicione a mensagem do Email
    email.setMsg(Jsoup.parse(mensagem).text());
    // Para autenticar no servidor é necessário chamar os dois métodos abaixo
    email.setTLS(true);
    email.setSSL(true);

    email.setAuthentication(this.email, this.senha);
    email.send();
  }