/* Check if there is a DOCTYPE at the start of the elements * list. If there is, return the appropriate version string. * If the DOCTYPE says it isn't HTML, trust it and call this * document ill-formed by returning -1. * If there is no DOCTYPE, or an unrecognized one, return 0. */ protected int checkDoctype(List elements) { JHElement firstElem = (JHElement) elements.get(0); if (firstElem instanceof JHXmlDecl && elements.size() >= 2) { firstElem = (JHElement) elements.get(1); } if (!(firstElem instanceof JHDoctype)) { return 0; // no DOCTYPE found } List dt = ((JHDoctype) firstElem).getDoctypeElements(); if (dt.size() < 3) { return 0; } try { // Is DOCTYPE case sensitive? Assume not. String str = ((String) dt.get(0)).toUpperCase(); if (!"HTML".equals(str)) { // It's not HTML return -1; } str = ((String) dt.get(1)).toUpperCase(); if (!"PUBLIC".equals(str)) { return 0; } str = stripQuotes(((String) dt.get(2)).toUpperCase()); _doctype = str; if ("-//W3C//DTD HTML 3.2 FINAL//EN".equals(str) || "-//W3C//DTD HTML 3.2//EN".equals(str)) { return HTML_3_2; } else if ("-//W3C//DTD HTML 4.0//EN".equals(str)) { return HTML_4_0_STRICT; } else if ("-//W3C//DTD HTML 4.0 TRANSITIONAL//EN".equals(str)) { return HTML_4_0_TRANSITIONAL; } else if ("-//W3C//DTD HTML 4.0 FRAMESET//EN".equals(str)) { return HTML_4_0_FRAMESET; } else if ("-//W3C//DTD HTML 4.01//EN".equals(str)) { return HTML_4_01_STRICT; } else if ("-//W3C//DTD HTML 4.01 TRANSITIONAL//EN".equals(str)) { return HTML_4_01_TRANSITIONAL; } else if ("-//W3C//DTD HTML 4.01 FRAMESET//EN".equals(str)) { return HTML_4_01_FRAMESET; } } catch (Exception e) { // Really shouldn't happen, but if it does we've got // a bad doctype return 0; } return 0; }
/* See if this document, even if it lacks a doctype, is most likely * XHTML. The test is that the document starts with an XML declaration * and has "html" for its first tag. * * Returns: * 0 if there's no XML declaration * 1 if there's an XML declaration but no html tag; in this * case it's probably some other kind of XML * 2 if there's an XML declaration and an html tag * */ protected int seemsToBeXHTML(List elements) { JHElement elem; try { elem = (JHElement) elements.get(0); if (!(elem instanceof JHXmlDecl)) { return 0; } Iterator iter = elements.iterator(); while (iter.hasNext()) { elem = (JHElement) iter.next(); if (elem instanceof JHOpenTag) { JHOpenTag tag = (JHOpenTag) elem; return ("html".equals(tag.getName()) ? 2 : 1); } } } catch (Exception e) { return 0; // document must be really empty } return 1; }