// Once collected, you get only one visit from the root tag block (hopefully HTML) public void visit(TagBlock tb) { // We are not going to parse attribute information in pages that don't have an HTML tag block if (tb.startTag.tagName.equalsIgnoreCase("HTML")) { HTMLElement he; TagBlock tbe; // Let's go looking for the head tag for (Enumeration be = tb.body.elements(); be.hasMoreElements(); ) { he = (HTMLElement) be.nextElement(); if (he.getClass() == TagBlock.class) { tbe = (TagBlock) he; if (tbe.startTag.tagName.equalsIgnoreCase("HEAD")) { // Found the head tag block, now let's look for the page title and any meta tags for (Enumeration hbe = tbe.body.elements(); hbe.hasMoreElements(); ) { he = (HTMLElement) hbe.nextElement(); if (he.getClass() == TagBlock.class) { tbe = (TagBlock) he; if (tbe.startTag.tagName.equalsIgnoreCase("TITLE")) { // Should only be one item in title body, hopefully it's a text element for (Enumeration e = tbe.body.elements(); e.hasMoreElements(); ) { he = (HTMLElement) e.nextElement(); if (he.getClass() == Text.class) { pageTitle = HttpUtility.HtmlDecode(((Text) he).text).Trim(); break; } } } } else if (he.getClass() == Tag.class) { Tag tg = (Tag) he; // See if this is a META tag if (tg.tagName.equalsIgnoreCase("META")) { // Convert this tag and it's attributes into a more .NET // friendly collection MetaTag metaTag = new MetaTag(tg); // Copy attributes in Java vector into .NET ArrayList... for (Enumeration ae = tg.attributeList.attributes.elements(); ae.hasMoreElements(); ) metaTag.get_Attributes().Add((Attribute) ae.nextElement()); tagList.Add(metaTag); } } } // Once we've found the head element, we can leave... break; } } } } }
private void GetMetaTags(MetaTags tags, String Name, boolean stopAfterFirst) { MetaTag tag; for (int x = 0; x < tagList.get_Count(); x++) { tag = get_Item(x); if (tag != null) { if (tag.get_Name().equalsIgnoreCase(Name)) { tags.Add(tag); if (stopAfterFirst) break; } } } }