Пример #1
0
 /* Check if there is a DOCTYPE at the start of the elements
  * list.  If there is, return the appropriate version string.
  * If the DOCTYPE says it isn't HTML, trust it and call this
  * document ill-formed by returning -1.
  * If there is no DOCTYPE, or an unrecognized one, return 0.
  */
 protected int checkDoctype(List elements) {
   JHElement firstElem = (JHElement) elements.get(0);
   if (firstElem instanceof JHXmlDecl && elements.size() >= 2) {
     firstElem = (JHElement) elements.get(1);
   }
   if (!(firstElem instanceof JHDoctype)) {
     return 0; // no DOCTYPE found
   }
   List dt = ((JHDoctype) firstElem).getDoctypeElements();
   if (dt.size() < 3) {
     return 0;
   }
   try {
     // Is DOCTYPE case sensitive?  Assume not.
     String str = ((String) dt.get(0)).toUpperCase();
     if (!"HTML".equals(str)) {
       // It's not HTML
       return -1;
     }
     str = ((String) dt.get(1)).toUpperCase();
     if (!"PUBLIC".equals(str)) {
       return 0;
     }
     str = stripQuotes(((String) dt.get(2)).toUpperCase());
     _doctype = str;
     if ("-//W3C//DTD HTML 3.2 FINAL//EN".equals(str) || "-//W3C//DTD HTML 3.2//EN".equals(str)) {
       return HTML_3_2;
     } else if ("-//W3C//DTD HTML 4.0//EN".equals(str)) {
       return HTML_4_0_STRICT;
     } else if ("-//W3C//DTD HTML 4.0 TRANSITIONAL//EN".equals(str)) {
       return HTML_4_0_TRANSITIONAL;
     } else if ("-//W3C//DTD HTML 4.0 FRAMESET//EN".equals(str)) {
       return HTML_4_0_FRAMESET;
     } else if ("-//W3C//DTD HTML 4.01//EN".equals(str)) {
       return HTML_4_01_STRICT;
     } else if ("-//W3C//DTD HTML 4.01 TRANSITIONAL//EN".equals(str)) {
       return HTML_4_01_TRANSITIONAL;
     } else if ("-//W3C//DTD HTML 4.01 FRAMESET//EN".equals(str)) {
       return HTML_4_01_FRAMESET;
     }
   } catch (Exception e) {
     // Really shouldn't happen, but if it does we've got
     // a bad doctype
     return 0;
   }
   return 0;
 }
Пример #2
0
 /*  See if this document, even if it lacks a doctype, is most likely
  *  XHTML.  The test is that the document starts with an XML declaration
  *  and has "html" for its first tag.
  *
  *  Returns:
  *     0 if there's no XML declaration
  *     1 if there's an XML declaration but no html tag; in this
  *       case it's probably some other kind of XML
  *     2 if there's an XML declaration and an html tag
  *
  */
 protected int seemsToBeXHTML(List elements) {
   JHElement elem;
   try {
     elem = (JHElement) elements.get(0);
     if (!(elem instanceof JHXmlDecl)) {
       return 0;
     }
     Iterator iter = elements.iterator();
     while (iter.hasNext()) {
       elem = (JHElement) iter.next();
       if (elem instanceof JHOpenTag) {
         JHOpenTag tag = (JHOpenTag) elem;
         return ("html".equals(tag.getName()) ? 2 : 1);
       }
     }
   } catch (Exception e) {
     return 0; // document must be really empty
   }
   return 1;
 }