private static PharmacieResultatParsingSousCategorie parserSousCategorie(
      Category subCategory, boolean premierAppel, Document sousCategorie) {
    PharmacieResultatParsingSousCategorie resultat = new PharmacieResultatParsingSousCategorie();
    Element productList = sousCategorie.select("div.products-list").first();
    if (productList != null) {
      Elements productsInfo = productList.select("div.info_product");
      if (productsInfo != null) {
        for (Element productInfo : productsInfo) {
          Element productInfoLink = productInfo.select("a.name-link").first();
          if (productInfoLink != null) {
            String productInfoLinkUrl = productInfoLink.attributes().get("href").toLowerCase();
            resultat.getListeUrlArticlesTrouves().add(productInfoLinkUrl);
          }
        }
      }
      if (premierAppel) {
        resultat
            .getListeAutrePagesAParserMemeCategorie()
            .addAll(verificationAutrePageAParser(sousCategorie));
      }
    } else {
      Element categoryList = sousCategorie.select("div.category-list").first();
      if (categoryList != null) {
        Elements nouvellesSousCategorie = categoryList.select("h2.category-title");
        if (nouvellesSousCategorie != null) {
          for (Element nouvelleSousCategorie : nouvellesSousCategorie) {
            Element productInfoLink = nouvelleSousCategorie.select("a").first();
            String nouvelleSousCategorieLink =
                productInfoLink.attributes().get("href").toLowerCase();
            String nouvelleSousCategorieName = nouvelleSousCategorie.text();

            // On crée la nouvelle catégorie
            List<Category> nouvelleListeCategorie = new ArrayList<Category>();
            Category newCategory =
                new Category(nouvelleSousCategorieName, nouvelleSousCategorieLink);
            newCategory.setParentCategory(subCategory);
            nouvelleListeCategorie.add(newCategory);
            if (!resultat.getMapAutresSousCategories().containsKey(subCategory)) {
              resultat.getMapAutresSousCategories().put(subCategory, nouvelleListeCategorie);
            } else {
              resultat
                  .getMapAutresSousCategories()
                  .get(subCategory)
                  .add(new Category(nouvelleSousCategorieName, nouvelleSousCategorieLink));
            }
          }
        }
      }
    }

    return resultat;
  }
Beispiel #2
0
 // @PostConstruct
 public void init() {
   try {
     Document doc = Jsoup.connect(CURRENCY_POINT).get();
     Element oshadBankContainer = doc.select("#7oiylpmiow8iy1sma9a").first(); // Oshadbank id
     Element currenciesContainer = oshadBankContainer.getElementsByTag("currencies").first();
     for (Element currencyContainer : currenciesContainer.getElementsByTag("c")) {
       Currency currency = Currency.valueOf(currencyContainer.id());
       currency.setBuyRate(new BigDecimal(currencyContainer.attributes().get("ar")));
       currency.setSellRate(new BigDecimal(currencyContainer.attributes().get("br")));
       LOGGER.info(currency.string() + " was inited");
     }
   } catch (IOException e) {
     LOGGER.error(e);
   }
 }
Beispiel #3
0
  @Test
  public void dataset() {
    Document doc =
        Jsoup.parse(
            "<div id=1 data-name=jsoup class=new data-package=jar>Hello</div><p id=2>Hello</p>");
    Element div = doc.select("div").first();
    Map<String, String> dataset = div.dataset();
    Attributes attributes = div.attributes();

    // size, get, set, add, remove
    assertEquals(2, dataset.size());
    assertEquals("jsoup", dataset.get("name"));
    assertEquals("jar", dataset.get("package"));

    dataset.put("name", "jsoup updated");
    dataset.put("language", "java");
    dataset.remove("package");

    assertEquals(2, dataset.size());
    assertEquals(4, attributes.size());
    assertEquals("jsoup updated", attributes.get("data-name"));
    assertEquals("jsoup updated", dataset.get("name"));
    assertEquals("java", attributes.get("data-language"));
    assertEquals("java", dataset.get("language"));

    attributes.put("data-food", "bacon");
    assertEquals(3, dataset.size());
    assertEquals("bacon", dataset.get("food"));

    attributes.put("data-", "empty");
    assertEquals(null, dataset.get("")); // data- is not a data attribute

    Element p = doc.select("p").first();
    assertEquals(0, p.dataset().size());
  }
Beispiel #4
0
  /**
   * Reads an Item from a design and inserts it into the data source. Recursively handles any
   * children of the item as well.
   *
   * @since 7.5.0
   * @param node an element representing the item (tree node).
   * @param selected A set accumulating selected items. If the item that is read is marked as
   *     selected, its item id should be added to this set.
   * @param context the DesignContext instance used in parsing
   * @return the item id of the new item
   * @throws DesignException if the tag name of the {@code node} element is not {@code node}.
   */
  @Override
  protected String readItem(Element node, Set<String> selected, DesignContext context) {

    if (!"node".equals(node.tagName())) {
      throw new DesignException(
          "Unrecognized child element in " + getClass().getSimpleName() + ": " + node.tagName());
    }

    String itemId = node.attr("text");
    addItem(itemId);
    if (node.hasAttr("icon")) {
      Resource icon =
          DesignAttributeHandler.readAttribute("icon", node.attributes(), Resource.class);
      setItemIcon(itemId, icon);
    }
    if (node.hasAttr("selected")) {
      selected.add(itemId);
    }

    for (Element child : node.children()) {
      String childItemId = readItem(child, selected, context);
      setParent(childItemId, itemId);
    }
    return itemId;
  }
Beispiel #5
0
  @SuppressLint("DefaultLocale")
  private String improveHtml(final String html) {

    final Document document = Jsoup.parse(html);

    for (final Element e : document.getAllElements()) {
      if (e.hasAttr("style")) {

        for (final Attribute a : e.attributes()) {
          if (a.getKey().compareTo("style") == 0) {
            final String[] items = a.getValue().trim().split(";");
            String newValue = "";
            for (final String item : items) {
              if (!item.toLowerCase(Locale.ENGLISH).contains("font-family:")
                  && !item.toLowerCase(Locale.ENGLISH).contains("font-size:")) {
                newValue = newValue.concat(item).concat(";");
              }
            }
            a.setValue(newValue);
          }
        }
      }
    }

    return document.body().html();
  }
  /** Produce predictable html (attributes in alphabetical order), always include close tags */
  private String elementToHtml(Element producedElem, StringBuilder sb) {
    ArrayList<String> names = new ArrayList<String>();
    for (Attribute a : producedElem.attributes().asList()) {
      names.add(a.getKey());
    }
    Collections.sort(names);

    sb.append("<" + producedElem.tagName() + "");
    for (String attrName : names) {
      sb.append(" ")
          .append(attrName)
          .append("=")
          .append("\'")
          .append(producedElem.attr(attrName))
          .append("\'");
    }
    sb.append(">");
    for (Node child : producedElem.childNodes()) {
      if (child instanceof Element) {
        elementToHtml((Element) child, sb);
      } else if (child instanceof TextNode) {
        String text = ((TextNode) child).text();
        sb.append(text.trim());
      }
    }
    sb.append("</").append(producedElem.tagName()).append(">");
    return sb.toString();
  }
Beispiel #7
0
  /**
   * Recursively writes a data source Item and its children to a design.
   *
   * @since 7.5.0
   * @param design the element into which to insert the item
   * @param itemId the id of the item to write
   * @param context the DesignContext instance used in writing
   * @return
   */
  @Override
  protected Element writeItem(Element design, Object itemId, DesignContext context) {
    Element element = design.appendElement("node");

    element.attr("text", itemId.toString());

    Resource icon = getItemIcon(itemId);
    if (icon != null) {
      DesignAttributeHandler.writeAttribute(
          "icon", element.attributes(), icon, null, Resource.class);
    }

    if (isSelected(itemId)) {
      element.attr("selected", "");
    }

    Collection<?> children = getChildren(itemId);
    if (children != null) {
      // Yeah... see #5864
      for (Object childItemId : children) {
        writeItem(element, childItemId, context);
      }
    }

    return element;
  }
Beispiel #8
0
  // parse from html element div#feed_item
  public MeipinItem(Element root) {
    id = root.attr("id");

    Element imga =
        root.child(0) // div#item_inner
            .child(0) // div#item_img
            .child(0); // a

    this.thumbnailUri =
        imga.child(0) // img
            .attr("src");

    if (imga.attributes().hasKey("onmouseover")) {
      String tmp = imga.attr("onmouseover");
      int start = tmp.indexOf(",") + 2;
      int end = tmp.indexOf("'", start);
      this.uri = tmp.substring(start, end);
      this.isGif = true;
    } else this.uri = this.thumbnailUri;

    if (this.uri.contains(".gif")) this.isGif = true;

    Element item_info =
        root.child(0) // div#item_inner
            .child(1); // div#item_info

    this.title =
        item_info
            .child(1) // h3
            .child(0) // a
            .attr("title");
  }
  public String reviseImgForQdaily(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eleimages = doc.select("img");
    for (Element img : eleimages) {
      Attributes attrs = img.attributes();
      String source = attrs.get("src");
      img.attr("src", "http://qdaily.com/" + source);
    }
    return doc.html();
  }
Beispiel #10
0
  public String reviseImgForIxiqi(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eleimages = doc.select("img");
    for (Element img : eleimages) {
      Attributes attrs = img.attributes();
      String source = attrs.get("data-original");
      img.attr("src", source);
    }
    return doc.html();
  }
 /**
  * Check if an element is visible based on whether it has an aria presentation tag.
  *
  * @param element
  * @return true if the element is visible rather than just presentation.
  * @todo(dallison) check other aria roles for visible intentions
  */
 static boolean isVisible(Element element) {
   Attributes attributes = element.attributes();
   if (attributes.hasKey("role")) {
     if (attributes.get(ARIA_ROLE).equals(ARIA_PRESENTATION)) {
       return false;
     } else {
       return true;
     }
   } else {
     return true;
   }
 }
Beispiel #12
0
  public String reviseImgForYuehui(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eleimages = doc.select("input[name=\"hiddenimg\"]");
    if (eleimages.size() > 0) {
      for (Element img : eleimages) {
        Attributes attrs = img.attributes();
        String source = attrs.get("value");
        img.parent().before("<img src=\"" + source + "\" />");
      }
    }
    return doc.html();
  }
Beispiel #13
0
  public String reviseImgForSohuNews(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements eleimages = doc.select("img");
    for (Element img : eleimages) {
      Attributes attrs = img.attributes();
      if (attrs.hasKey("data-src")) {
        String source = attrs.get("data-src");
        img.attr("src", source);
      }
    }
    return doc.html();
  }
  private static String cleanHtml(final Node node) {
    if (node instanceof Element) {
      Element element = ((Element) node);
      StringBuilder accum = new StringBuilder();
      accum.append("<").append(element.tagName());
      for (Attribute attribute : element.attributes()) {
        if (!(attribute.getKey().startsWith("_"))) {
          accum.append(" ");
          accum.append(attribute.getKey());
          accum.append("=\"");
          accum.append(attribute.getValue());
          accum.append('"');
        }
      }

      if (element.childNodes().isEmpty() && element.tag().isEmpty()) {
        accum.append(" />");
      } else {
        accum.append(">");
        for (Node child : element.childNodes()) accum.append(cleanHtml(child));

        accum.append("</").append(element.tagName()).append(">");
      }
      return accum.toString();
    } else if (node instanceof TextNode) {
      return ((TextNode) node).getWholeText();
    } else if (node instanceof XmlDeclaration) {

      // HACK
      if (node.childNodes().isEmpty()) {
        return "";
      }
      return node.outerHtml();
    } else if (node instanceof Comment) {
      // HACK: elide comments for now.
      return "";
    } else if (node instanceof DataNode && node.childNodes().isEmpty()) {
      // No child nodes are defined but we have to handle content if such exists, example
      // <script language="JavaScript">var a =  { name: "${user.name}"}</script>

      String content = node.attr("data");
      if (Strings.empty(content)) {
        return "";
      }

      return content;
    } else {
      return node.outerHtml();
    }
  }
Beispiel #15
0
  private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
      if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr);
      else numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
  }
 private static List<String> verificationAutrePageAParser(Document sousCategorie) {
   List<String> autrePageAParser = new ArrayList<String>();
   Element pagination = sousCategorie.select("div.results-pages").first();
   if (pagination != null) {
     Elements liens = pagination.select("a:not(.previous), a:not(.next)");
     if (liens != null) {
       for (Element lienElem : liens) {
         if (lienElem.className() != null && lienElem.className().isEmpty()) {
           String lien = lienElem.attributes().get("href").toLowerCase();
           autrePageAParser.add(lien);
         }
       }
     }
   }
   return autrePageAParser;
 }
Beispiel #17
0
  @Test
  public void testAddBooleanAttribute() {
    Element div = new Element(Tag.valueOf("div"), "");

    div.attr("true", true);

    div.attr("false", "value");
    div.attr("false", false);

    assertTrue(div.hasAttr("true"));
    assertEquals("", div.attr("true"));

    List<Attribute> attributes = div.attributes().asList();
    assertEquals("There should be one attribute", 1, attributes.size());
    assertTrue("Attribute should be boolean", attributes.get(0) instanceof BooleanAttribute);

    assertFalse(div.hasAttr("false"));

    assertEquals("<div true></div>", div.outerHtml());
  }
Beispiel #18
0
  public String reviseImgForZhiHuApp(String pcont) {
    if (pcont == null) return "";

    Document doc = Jsoup.parse(pcont);
    Elements noeles = doc.select("noscript");
    for (Element no : noeles) {
      Elements eleimages = no.getElementsByTag("img");
      for (Element img : eleimages) {
        Attributes attrs = img.attributes();
        String source = attrs.get("src");
        img.parent().before("<img src=\"" + source + "\" />");
      }
      no.remove();
    }
    Elements eleimages = doc.select("img");
    for (Element img : eleimages) {
      String source = img.attr("data-original"), s2 = img.attr("data-actualsrc");
      if (!source.equals("")) img.attr("src", source);
      if (!s2.equals("")) img.attr("src", s2);
    }
    return doc.html();
  }
Beispiel #19
0
 @Override
 public boolean matches(Element root, Element element) {
   return element.attributes().size() > 0;
 }