Ejemplo n.º 1
0
  private void findRecommendations(
      @NotNull Document doc, @NotNull BilingualQueryResultBuilder resultBuilder) {
    // Determine all candidate nodes:
    Elements alternativeNodes = doc.select("div.cc > p > *");

    Language currentLanguage = null;

    for (Element node : alternativeNodes) {
      // If the next node is a flagicon, try to determine the language for the next entries from the
      // class name
      if (node.tagName().equals("span") && node.hasClass("flagicon")) {
        Set<String> classNames = node.classNames();
        classNames.remove("flagicon");
        for (String className : classNames) {
          Language candidate = Language.getExistingLanguageById(className);
          if (candidate != null) {
            currentLanguage = candidate;
            break;
          }
        }
      } else if (node.tagName().equals("a")) {
        String recommendationText = node.text();

        DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder();
        objectBuilder.setLanguage(currentLanguage).setGeneralForm(recommendationText);

        resultBuilder.addSimilarRecommendation(objectBuilder.build());
      }
    }
  }
Ejemplo n.º 2
0
 public List<AreaVO> parseMessage(String text, int pid) {
   Document doc = Jsoup.parse(text);
   Element body = doc.body();
   List<AreaVO> areas = new ArrayList<AreaVO>();
   Elements divs = body.getElementsByClass("subarea");
   if (divs.size() > 0) {
     Element div = divs.get(0);
     Elements childs = div.children();
     String letter = "";
     for (int i = 1; i < childs.size(); i++) {
       Element child = childs.get(i);
       if ("b".equals(child.tagName())) {
         letter = child.text();
         continue;
       }
       if ("a".equals(child.tagName())) {
         AreaVO area = new AreaVO();
         area.setLetter(letter);
         area.setName(child.text());
         area.setOrderIdx(index);
         area.setPid(pid);
         String href = child.attr("href");
         String pinyin = href.substring(7, href.lastIndexOf("/"));
         area.setPinyin(pinyin);
         index++;
         System.out.println(area.toString());
         areas.add(area);
       }
     }
   }
   return areas;
 }
Ejemplo n.º 3
0
 public static void main(String[] args) throws IOException {
   //		Validate.isTrue(args.length == 1, "usage: supply url to fetch");
   //		String url = args[0];
   //		String url = "http://www.hao123.com";
   String url = "http://www.iteye.com/login";
   print("Fetching %s...", url);
   Document doc = Jsoup.connect(url).get();
   Elements links = doc.select("a[href]");
   Elements media = doc.select("[src]");
   Elements imports = doc.select("link[href]");
   print("\nMedia: (%d)", media.size());
   for (Element src : media) {
     if (src.tagName().equals("img"))
       print(
           " * %s: <%s> %sx%s (%s)",
           src.tagName(),
           src.attr("abs:src"),
           src.attr("width"),
           src.attr("height"),
           trim(src.attr("alt"), 20));
     else print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
   }
   print("\nImports: (%d)", imports.size());
   for (Element link : imports) {
     print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"), link.attr("rel"));
   }
   print("\nLinks: (%d)", links.size());
   for (Element link : links) {
     print(" * a: <%s>  (%s)", link.attr("abs:href"), trim(link.text(), 35));
   }
 }
Ejemplo n.º 4
0
  /** Produce predictable html (attributes in alphabetical order), always include close tags */
  private String elementToHtml(Element producedElem, StringBuilder sb) {
    ArrayList<String> names = new ArrayList<String>();
    for (Attribute a : producedElem.attributes().asList()) {
      names.add(a.getKey());
    }
    Collections.sort(names);

    sb.append("<" + producedElem.tagName() + "");
    for (String attrName : names) {
      sb.append(" ")
          .append(attrName)
          .append("=")
          .append("\'")
          .append(producedElem.attr(attrName))
          .append("\'");
    }
    sb.append(">");
    for (Node child : producedElem.childNodes()) {
      if (child instanceof Element) {
        elementToHtml((Element) child, sb);
      } else if (child instanceof TextNode) {
        String text = ((TextNode) child).text();
        sb.append(text.trim());
      }
    }
    sb.append("</").append(producedElem.tagName()).append(">");
    return sb.toString();
  }
Ejemplo n.º 5
0
  /**
   * Reads an Item from a design and inserts it into the data source. Recursively handles any
   * children of the item as well.
   *
   * @since 7.5.0
   * @param node an element representing the item (tree node).
   * @param selected A set accumulating selected items. If the item that is read is marked as
   *     selected, its item id should be added to this set.
   * @param context the DesignContext instance used in parsing
   * @return the item id of the new item
   * @throws DesignException if the tag name of the {@code node} element is not {@code node}.
   */
  @Override
  protected String readItem(Element node, Set<String> selected, DesignContext context) {

    if (!"node".equals(node.tagName())) {
      throw new DesignException(
          "Unrecognized child element in " + getClass().getSimpleName() + ": " + node.tagName());
    }

    String itemId = node.attr("text");
    addItem(itemId);
    if (node.hasAttr("icon")) {
      Resource icon =
          DesignAttributeHandler.readAttribute("icon", node.attributes(), Resource.class);
      setItemIcon(itemId, icon);
    }
    if (node.hasAttr("selected")) {
      selected.add(itemId);
    }

    for (Element child : node.children()) {
      String childItemId = readItem(child, selected, context);
      setParent(childItemId, itemId);
    }
    return itemId;
  }
 private static Node toNode(Element aInElement) {
   int i = aInElement.siblingIndex();
   Node lNode = aInElement.parent().childNode(i);
   if (!lNode.nodeName().equals(aInElement.tagName())) {
     throw new RuntimeException(lNode.nodeName() + " != " + aInElement.tagName());
   }
   return lNode;
 }
Ejemplo n.º 7
0
 @Test
 public void parsesSimpleDocument() {
   String html =
       "<html><head><title>First!</title></head><body><p>First post! <img src=\"foo.png\" /></p></body></html>";
   Document doc = Jsoup.parse(html);
   // need a better way to verify these:
   Element p = doc.child(1).child(0);
   assertEquals("p", p.tagName());
   Element img = p.child(0);
   assertEquals("foo.png", img.attr("src"));
   assertEquals("img", img.tagName());
 }
Ejemplo n.º 8
0
  @Test
  public void testGetElementById() {
    Document doc = Jsoup.parse(reference);
    Element div = doc.getElementById("div1");
    assertEquals("div1", div.id());
    assertNull(doc.getElementById("none"));

    Document doc2 =
        Jsoup.parse("<div id=1><div id=2><p>Hello <span id=2>world!</span></p></div></div>");
    Element div2 = doc2.getElementById("2");
    assertEquals("div", div2.tagName()); // not the span
    Element span = div2.child(0).getElementById("2"); // called from <p> context should be span
    assertEquals("span", span.tagName());
  }
Ejemplo n.º 9
0
  private static String cleanHtml(final Node node) {
    if (node instanceof Element) {
      Element element = ((Element) node);
      StringBuilder accum = new StringBuilder();
      accum.append("<").append(element.tagName());
      for (Attribute attribute : element.attributes()) {
        if (!(attribute.getKey().startsWith("_"))) {
          accum.append(" ");
          accum.append(attribute.getKey());
          accum.append("=\"");
          accum.append(attribute.getValue());
          accum.append('"');
        }
      }

      if (element.childNodes().isEmpty() && element.tag().isEmpty()) {
        accum.append(" />");
      } else {
        accum.append(">");
        for (Node child : element.childNodes()) accum.append(cleanHtml(child));

        accum.append("</").append(element.tagName()).append(">");
      }
      return accum.toString();
    } else if (node instanceof TextNode) {
      return ((TextNode) node).getWholeText();
    } else if (node instanceof XmlDeclaration) {

      // HACK
      if (node.childNodes().isEmpty()) {
        return "";
      }
      return node.outerHtml();
    } else if (node instanceof Comment) {
      // HACK: elide comments for now.
      return "";
    } else if (node instanceof DataNode && node.childNodes().isEmpty()) {
      // No child nodes are defined but we have to handle content if such exists, example
      // <script language="JavaScript">var a =  { name: "${user.name}"}</script>

      String content = node.attr("data");
      if (Strings.empty(content)) {
        return "";
      }

      return content;
    } else {
      return node.outerHtml();
    }
  }
Ejemplo n.º 10
0
  public void download(Connection aInConnection, Collection<Image> images) throws IOException {
    aInConnection.url(url);
    Document lDocument = aInConnection.get();
    Element lMain = lDocument.getElementById("main");
    Elements lContents = lMain.getElementsByClass("content");

    if (lContents.size() == 1) {
      StringBuilder sb = new StringBuilder();
      Element lContent = lContents.first();

      collectImages(lContent, images);

      Elements lLightboxElements = lContent.getElementsByClass("lightbox");
      for (Element lLightboxElement : lLightboxElements) {
        Collection<Node> lImageNodes = extractImageNodes(lLightboxElement);

        Element lParent = lLightboxElement.parent();
        int i = lLightboxElement.siblingIndex();
        lParent.insertChildren(i, lImageNodes);
        lLightboxElement.remove();
      }

      Elements lChildElements = lContent.children();
      for (Element lChildElement : lChildElements) {
        if (lChildElement.hasClass("clear")) {
          // no more post content
          break;
        }

        if (title == null && lChildElement.tagName().equals("h1")) {
          // the first h1 header is the title
          title = lChildElement.html();
        } else {
          if (excerpt == null && lChildElement.tagName().equals("p")) {
            excerpt = lChildElement.text();
          }
          String lStr = lChildElement.toString();
          sb.append(lStr);
        }
      }

      content = sb.toString();

      Elements lDateElements = lContent.getElementsByClass("date");
      String lHunDate = lDateElements.first().html();
      date = new PostDate(lHunDate);
    } else {
      System.out.println("More than one content in main section of post page " + toString());
    }
  }
Ejemplo n.º 11
0
  public static Pupil getSelectedPupil(Document doc) throws ParseException {

    boolean found = false;
    Pupil p, selectedP = null;

    Elements pupilSelectors =
        doc.getElementsByAttributeValue("id", "ctl00_topMenu_pupil_drdPupils");
    for (Element pupilSelector : pupilSelectors) {

      Elements pupils = pupilSelector.getAllElements();
      for (Element pupil : pupils) {
        if (pupil.tagName().equals("option")) {

          String value = pupil.attr("value");

          found = true;
          if ((p = Pupil.getByFormId(value)) == null) {

            p = new Pupil(pupil.text(), value);
            long rowId = p.insert();

            if (BuildConfig.DEBUG)
              Log.d("GshisHTMLParser", TS.get() + " Pupil.insert() = " + rowId);
          }

          if (pupil.hasAttr("selected") && pupil.attr("selected").equals("selected")) {

            selectedP = p;
          }
        }
      }
    }

    if (!found) {

      if (BuildConfig.DEBUG) Log.d("GshisParser", TS.get() + " Alternative fields found!");

      Element userName = doc.getElementsByClass("user-name").first();
      Element userId = doc.getElementsByAttributeValue("id", "ctl00_topMenu_tbUserId").first();

      String name = userName.text();
      String id = userId.attr("value");

      if (BuildConfig.DEBUG) Log.d("GshisParser", TS.get() + " name=" + name + " id=" + id);

      if ((p = Pupil.getByFormId(id)) == null) {

        p = new Pupil(name, id);
        long rowId = p.insert();

        if (BuildConfig.DEBUG) Log.d("GshisParser", TS.get() + " Pupil.insert() = " + rowId);
      }

      selectedP = p;
    }

    if (selectedP == null) throw new ParseException("Pupils not found", 0);

    return selectedP;
  }
Ejemplo n.º 12
0
  /**
   * @param theLinks
   * @throws IOException
   */
  public void getSecondLinks(ArrayList<String> theLinks) throws IOException {
    String temp = null;
    Document doc = null;
    boolean flag;
    for (String sLink : theLinks) {
      if (sLink.endsWith(".asx") == true || sLink.endsWith(".swf") == true) {
        stationLinks2.add(sLink);
        print("Written to file: %s", sLink);
      } else {
        doc = parseUrl(sLink, 0);
        if (doc != null) {
          Elements media = doc.select("[src]");
          print("Fetching %s -->  ", sLink);

          flag = false;
          for (Element src : media) {
            if (src.tagName().equals("embed") == true) {
              flag = true;
              temp = src.attr("abs:src");
              stationLinks2.add(temp);
              break; // link found, load next url
            }
          } // end nested for
          if (flag == false) { // the code has no embed tag
            stationLinks2.add(sLink);
          }
        }
      }
    } // end outer for
    writeLinksToFile(links2FileName, stationLinks2);
    print("Written %s to file, second links.", stationLinks2.size());
  } // end method
Ejemplo n.º 13
0
 private static void accumulateParents(Element el, Elements parents) {
   Element parent = el.parent();
   if (parent != null && !parent.tagName().equals("#root")) {
     parents.add(parent);
     accumulateParents(parent, parents);
   }
 }
Ejemplo n.º 14
0
 private void recurse(Element element) {
   ElementAction action = classifyElement(element);
   if (action == ElementAction.Whitespace || action == ElementAction.Sentence) {
     appendSpace();
   }
   for (Node childNode : element.childNodes()) {
     // n.b., cdata not possible if we are coming from TagSoup. If we also handle
     // real xhtml by directly parsing it, then we have another story on our hands.
     // though we could use canonical XML to get rid of them.
     if (childNode instanceof TextNode && action != ElementAction.Banned) {
       TextNode textContent = (TextNode) childNode;
       String textString = textContent.text();
       append(textContent, textString);
     } else if (childNode instanceof Element) {
       recurse((Element) childNode);
     }
   }
   if (action == ElementAction.Whitespace) {
     appendSpace();
   } else if (action == ElementAction.Sentence) {
     appendPeriod();
   } else if (action == ElementAction.Mark) {
     Mark mark = new Mark();
     mark.setOffset(pcDataOffset);
     mark.setTag(element.tagName());
   }
 }
Ejemplo n.º 15
0
    public void head(Node source, int depth) {
      if (source instanceof Element) {
        Element sourceEl = (Element) source;

        if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone and copy safe attrs
          ElementMeta meta = createSafeElement(sourceEl);
          Element destChild = meta.el;
          destination.appendChild(destChild);

          numDiscarded += meta.numAttribsDiscarded;
          destination = destChild;
        } else if (source
            != root) { // not a safe tag, so don't add. don't count root against discarded.
          numDiscarded++;
        }
      } else if (source instanceof TextNode) {
        TextNode sourceText = (TextNode) source;
        TextNode destText = new TextNode(sourceText.getWholeText(), source.baseUri());
        destination.appendChild(destText);
      } else if (source instanceof DataNode && whitelist.isSafeTag(source.parent().nodeName())) {
        DataNode sourceData = (DataNode) source;
        DataNode destData = new DataNode(sourceData.getWholeData(), source.baseUri());
        destination.appendChild(destData);
      } else { // else, we don't care about comments, xml proc instructions, etc
        numDiscarded++;
      }
    }
Ejemplo n.º 16
0
 private boolean isBlock(Node n) {
   boolean block = false;
   if (n != null && n instanceof Element) {
     Element el = (Element) n;
     block = el.isBlock() || el.tagName().equals("br");
   }
   return block;
 }
Ejemplo n.º 17
0
  private static void parseTitle(Topic.Builder topicBuilder, Element ele) {
    ele = ele.select(".item_title > a").get(0);
    Preconditions.checkState(ele.tagName().equals("a"));
    String url = ele.attr("href");

    topicBuilder.setId(Topic.getIdFromUrl(url));
    topicBuilder.setTitle(ele.html());
  }
Ejemplo n.º 18
0
 @Test
 public void parsesUnterminatedAttribute() {
   String h1 = "<p id=\"foo";
   Document doc = Jsoup.parse(h1);
   Element p = doc.getElementById("foo");
   assertNotNull(p);
   assertEquals("p", p.tagName());
 }
Ejemplo n.º 19
0
  static void parseMember(Topic.Builder builder, Element ele) {
    final Member.Builder memberBuilder = new Member.Builder();

    // get member url
    ele = ele.child(0);
    Preconditions.checkState(ele.tagName().equals("a"));
    final String url = ele.attr("href");
    memberBuilder.setUsername(Member.getNameFromUrl(url));

    // get member avatar
    final Avatar.Builder avatarBuilder = new Avatar.Builder();
    ele = ele.child(0);
    Preconditions.checkState(ele.tagName().equals("img"));
    avatarBuilder.setUrl(ele.attr("src"));
    memberBuilder.setAvatar(avatarBuilder.createAvatar());

    builder.setMember(memberBuilder.createMember());
  }
Ejemplo n.º 20
0
  public static Week getSelectedWeek(Document doc, Schedule s) throws ParseException {

    boolean found = false;
    Week selectedW = null;

    SimpleDateFormat f = new SimpleDateFormat("yyyy dd.MM", Locale.ENGLISH);
    f.setTimeZone(TimeZone.getTimeZone("Europe/Moscow"));

    Elements weekSelectors = doc.getElementsByAttributeValue("id", "ctl00_body_week_drdWeeks");
    for (Element weekSelector : weekSelectors) {

      Elements weeks = weekSelector.getAllElements();
      for (Element week : weeks) {
        if (week.tagName().equals("option")) {

          String value = week.text();
          Week w;
          found = true;

          if ((w = s.getWeek(week.attr("value"))) == null) {

            w = new Week();

            String wBegin = value.substring(0, value.indexOf("-") - 1);
            String wMonth = wBegin.substring(wBegin.indexOf(".") + 1, wBegin.length());

            String year;
            if (Integer.parseInt(wMonth) > 7) {
              year = s.getFormText().substring(0, s.getFormText().indexOf("-") - 1);
            } else {
              year =
                  s.getFormText()
                      .substring(s.getFormText().indexOf("-") + 2, s.getFormText().length());
            }

            w.setStart(f.parse(year + " " + wBegin));
            w.setFormText(week.text());
            w.setFormId(week.attr("value"));

            s.addWeek(w);
          }

          if (week.hasAttr("selected") && week.attr("selected").equals("selected")) {

            selectedW = w;
            long u = w.setLoaded().update();

            if (BuildConfig.DEBUG) Log.d("GshisHTMLParser", TS.get() + " Week.update() = " + u);
          }
        }
      }
    }

    if (!found) throw new ParseException("Weeks not found", 0);

    return selectedW;
  }
Ejemplo n.º 21
0
 // recursively processes the element to replace <br>'s with \n
 private void fixLineBreaks(Element el) {
   for (final Element e : el.children()) {
     if (e.tagName().equals("br")) {
       e.before("\n");
       e.remove();
     } else {
       fixLineBreaks(e);
     }
   }
 }
Ejemplo n.º 22
0
  /**
   * Complement of HtmlTemplateCompiler#lexicalClimb(). This method pops off the stack of lexical
   * scopes when we're done processing a sitebricks widget.
   */
  private void lexicalDescend(PageCompilingContext pc, Element element, boolean shouldPopScope) {

    // pop form
    if ("form".equals(element.tagName())) pc.form = null;

    // pop compiler if the scope ends
    if (shouldPopScope) {
      pc.lexicalScopes.pop();
    }
  }
 /**
  * Determines whether an element is not focusable using keys.
  *
  * <p>TODO(jharty): We need to determine which elements are generally focusable (where focus is
  * disabled by setting tabindex to a negative number) and which ones generally are NOT focusable,
  * but may become focusable by setting a positive tabindex. TODO(jharty): This could move to
  * OnClickIsFocusable?
  *
  * @param element the element to test
  * @return true when the element is not focusable, else false.
  */
 static boolean notFocusable(Element element) {
   List<String> focusable =
       Arrays.asList(new String[] {"a", "button", "input", "select", "textarea"});
   if (focusable.contains(element.tagName())) {
     if (element.hasAttr(TAB_INDEX) && -1 >= Integer.parseInt(element.attr(TAB_INDEX))) {
       return true;
     }
   }
   return false;
 }
Ejemplo n.º 24
0
  @Test
  public void parsesRoughAttributes() {
    String html =
        "<html><head><title>First!</title></head><body><p class=\"foo > bar\">First post! <img src=\"foo.png\" /></p></body></html>";
    Document doc = Jsoup.parse(html);

    // need a better way to verify these:
    Element p = doc.child(1).child(0);
    assertEquals("p", p.tagName());
    assertEquals("foo > bar", p.attr("class"));
  }
Ejemplo n.º 25
0
 public Map doProcess() {
   if (!isInit) {
     init();
   }
   Iterator<String> it = map.keySet().iterator();
   while (it.hasNext()) {
     String key = it.next();
     Element val = map.get(key);
     // 如果是ul或者table,用相应的组件渲染
     if (("ul".equals(val.tagName().toLowerCase()))
         || ("table".equals(val.tagName().toLowerCase()))) {
       System.out.println("------------------列表开始-----------------------------");
       Elements links = val.select("a");
       for (Element ele : links) {
         System.out.println("a:" + ele.attr("abs:href") + ",文本:" + ele.text());
       }
       System.out.println("------------------列表结束-----------------------------");
     } else {
       System.out.println("------------------非列表-----------------------------");
       if ("a".equals(val.tagName().toLowerCase())) {
         System.out.println("a:" + val.attr("abs:href") + ",文本:" + val.text());
       } else {
         if ("span".equals(val.tagName().toLowerCase())) {
           System.out.println("父容器:" + val.parent().tagName());
         }
         System.out.println("标签:" + val.tagName() + ",html:" + val.html());
       }
     }
   }
   System.out.println("---------------top menu---------------------");
   it = topMenumap.keySet().iterator();
   while (it.hasNext()) {
     String key = (String) it.next();
     Element val = topMenumap.get(key);
     Elements links = val.select("a");
     for (Element ele : links) {
       System.out.println("a:" + ele.attr("abs:href") + ",文本:" + ele.text());
     }
   }
   return map;
 }
Ejemplo n.º 26
0
 private void getChildElement(Element parentElement, Integer level) {
   parentElement.html(deleteComent(parentElement.html()));
   // System.out.println("key:"+(level+","+parentElement.hashCode())+",value:"+parentElement.html());
   if (parentElement.children().size() > 0) {
     level += 1;
     for (int i = 0; i < parentElement.children().size(); i++) {
       if (("ul".equals(parentElement.tagName().toLowerCase()))
           || ("table".equals(parentElement.tagName().toLowerCase()))) { // 整体标签
         String html = parentElement.html().replaceAll(" ", "").replaceAll(" ", ""); // 去中英文空格
         if (html.contains("首页") || parentElement.id().contains("nav")) {
           //
           // System.out.println("----------------------首页Start-----------------------------");
           Elements links = parentElement.select("a");
           for (Element ele : links) {
             if (topMenumap.get(level + "," + ele.hashCode()) == null) {
               topMenumap.put(level + "," + ele.hashCode(), ele);
               // System.out.println(level + "," + ele.hashCode() + ",---------------" +
               // ele.html());
               // System.out.println("a:" +
               // ele.attr("abs:href") + ",文本:" + ele.text());
             }
           }
           //						 System.out.println("----------------------首页End-----------------------------");
         } else {
           map.put(level + "," + parentElement.hashCode(), parentElement);
         }
       } else {
         getChildElement(parentElement.child(i), level);
       }
     }
   } else {
     if ("script".equals(parentElement.tagName().toLowerCase())) {
       return;
     }
     if (StringUtils.isNotEmpty(parentElement.html())) {
       level += 1;
       map.put(level + "," + parentElement.hashCode(), parentElement);
     }
   }
 }
Ejemplo n.º 27
0
 public static String printNode(Element root, int indentation) {
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < indentation; i++) {
     sb.append(' ');
   }
   sb.append(root.tagName());
   sb.append(":");
   sb.append(root.ownText());
   sb.append("\n");
   for (Element el : root.children()) {
     sb.append(printNode(el, indentation + 1));
     sb.append("\n");
   }
   return sb.toString();
 }
 void appendTextSkipHidden(Element e, StringBuilder accum) {
   for (Node child : e.childNodes()) {
     if (unlikely(child)) continue;
     if (child instanceof TextNode) {
       TextNode textNode = (TextNode) child;
       String txt = textNode.text();
       accum.append(txt);
     } else if (child instanceof Element) {
       Element element = (Element) child;
       if (accum.length() > 0 && element.isBlock() && !lastCharIsWhitespace(accum))
         accum.append(" ");
       else if (element.tagName().equals("br")) accum.append(" ");
       appendTextSkipHidden(element, accum);
     }
   }
 }
Ejemplo n.º 29
0
  private ElementMeta createSafeElement(Element sourceEl) {
    String sourceTag = sourceEl.tagName();
    Attributes destAttrs = new Attributes();
    Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs);
    int numDiscarded = 0;

    Attributes sourceAttrs = sourceEl.attributes();
    for (Attribute sourceAttr : sourceAttrs) {
      if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr)) destAttrs.put(sourceAttr);
      else numDiscarded++;
    }
    Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
    destAttrs.addAll(enforcedAttrs);

    return new ElementMeta(dest, numDiscarded);
  }
Ejemplo n.º 30
0
  public static GradeSemester getActiveGradeSemester(Document doc, Schedule sch)
      throws ParseException {

    boolean found = false;
    GradeSemester selG = null;

    SimpleDateFormat fmt = new SimpleDateFormat("dd.MM.yyyy", Locale.ENGLISH);
    fmt.setTimeZone(TimeZone.getTimeZone("Europe/Moscow"));

    Elements semesterSelectors = doc.getElementsByAttributeValue("id", "ctl00_body_drdTerms");
    for (Element semesterSelector : semesterSelectors) {

      Elements semesters = semesterSelector.getAllElements();
      for (Element semester : semesters) {
        if (semester.tagName().equals("option")) {

          String value = semester.text();
          GradeSemester sem;
          found = true;

          if ((sem = sch.getSemester(semester.attr("value"))) == null) {

            sem = new GradeSemester();

            sem.setStart(fmt.parse(value.substring(12, value.indexOf("-") - 1)));
            sem.setStop(fmt.parse(value.substring(value.indexOf("-") + 2, value.length() - 2)));
            sem.setFormText(semester.text());
            sem.setFormId(semester.attr("value"));

            sch.addSemester(sem);
          }

          if (semester.hasAttr("selected") && semester.attr("selected").equals("selected")) {

            long u = sem.setLoaded().update();
            selG = sem;

            if (BuildConfig.DEBUG) Log.d("GshisHTMLParser", TS.get() + " Semester.update() = " + u);
          }
        }
      }
    }

    if (!found) throw new ParseException("Semesters not found", 0);

    return selG;
  }