/** Parses the HTTP response section, determining the body offset and setting the charset field */ private void parseResponse() { // int base; boolean parsingContentType = false; bss.init(responseBase, responseOffset, responseOffset + responseLength); if (!ByteScan.SkipToWhitespace(bss)) return; if (!ByteScan.SkipWhitespace(bss)) return; ByteScan.ParseInt(bss); responseCode = bss.ival; while (!bss.eob()) { if (bss.buf[bss.offset] == '\n') { if (parsingContentType) { tmpBss.init(bss.buf, bss.mark, bss.offset); parseContentType(tmpBss); parsingContentType = false; } if (bss.offset < bss.end - 2) { if (bss.buf[bss.offset + 1] == '\n') { responseBodyOffset = bss.offset + 2; bss.offset += 2; break; } else if (bss.buf[bss.offset + 1] == '\r' && bss.buf[bss.offset + 2] == '\n') { responseBodyOffset = bss.offset + 3; bss.offset += 3; break; } } else { responseBodyOffset = bss.end; bss.offset = bss.end; break; } bss.offset++; bss.mark(); } else if (bss.buf[bss.offset] == ':') { tmpBss.init(bss.buf, bss.mark, bss.offset); if (ByteScan.Equals(tmpBss, contentTypeChars)) parsingContentType = true; bss.offset++; bss.mark(); } else bss.offset++; } responseBodyOffset = bss.offset; if (contentType == null || contentType.startsWith("text")) findAndParseContentType(bss); }
/** Parses the Content-Type HTTP header */ private void parseContentType(ByteScan.State bss) { if (!ByteScan.SkipWhitespace(bss)) return; if (bss.buf[bss.offset] == '"') bss.offset++; if (!ByteScan.SkipWhitespace(bss)) return; bss.mark(); while (!bss.eob() && (Character.isLetterOrDigit(bss.buf[bss.offset]) || bss.buf[bss.offset] == '/' || bss.buf[bss.offset] == '-' || bss.buf[bss.offset] == '+' || bss.buf[bss.offset] == '.')) bss.offset++; contentType = bss.toString().toLowerCase().trim(); if (contentType != null && contentType.indexOf("/") == -1) contentType = null; if (!ByteScan.Find(bss, ';')) return; if (!bss.eob()) { bss.mark(); bss.offset = bss.end; String inputStr = bss.toString().toLowerCase(); String paramStr; StringTokenizer st = new StringTokenizer(inputStr, ";"); while (st.hasMoreTokens()) { paramStr = st.nextToken().trim(); if (paramStr.startsWith("charset")) { int eqOff = paramStr.indexOf('=', 7); if (eqOff >= 0) { charset = paramStr.substring(eqOff + 1).trim().toUpperCase(); charset = cleanCharset(charset); } } } } }
/** * Looks for <META http-equiv="Content-Type" ... or <?xml ... encoding=" and extracts content-type * and/or charset info */ private void findAndParseContentType(ByteScan.State bss) { if (!ByteScan.FindSkip(bss, '<')) return; /** XML */ if (ByteScan.StartsWithSkip(bss, xmlChars)) { // set byte scan region to that inside <?xml ... ?> bss.mark(); if (!ByteScan.Find(bss, xmlEndChars)) return; bss.end = bss.offset; bss.flip(); // find encoding if (!ByteScan.FindSkip(bss, encodingChars)) return; // find starting doublequote if (!ByteScan.FindSkip(bss, '"')) return; bss.mark(); if (!ByteScan.Find(bss, '"')) return; charset = bss.toString().toUpperCase(); return; } /** HTML */ bss.offset--; while (ByteScan.FindSkip(bss, '<')) { if (ByteScan.StartsWithSkip(bss, metaChars)) { // make sure next comes whitespace bss.mark(); if (!ByteScan.SkipWhitespace(bss)) return; if (bss.offset == bss.mark) continue; if (!ByteScan.StartsWithSkip(bss, httpEquivChars)) continue; if (!ByteScan.SkipWhitespace(bss)) return; // skip '=' character if (bss.buf[bss.offset++] != '=') continue; if (!ByteScan.SkipWhitespace(bss)) return; // skip '"' character if (bss.buf[bss.offset++] != '"') continue; bss.mark(); if (!ByteScan.FindSkip(bss, '"')) return; bss.flip(); if (!ByteScan.StartsWith(bss, contentTypeChars)) continue; bss.flip(); if (!ByteScan.SkipWhitespace(bss)) return; if (!ByteScan.StartsWithSkip(bss, contentChars)) return; if (!ByteScan.SkipWhitespace(bss)) return; // skip '=' character if (bss.buf[bss.offset++] != '=') continue; if (!ByteScan.SkipWhitespace(bss)) return; // skip '=' character if (bss.buf[bss.offset++] != '"') continue; bss.mark(); if (!ByteScan.Find(bss, '"')) return; tmpBss.init(bss.buf, bss.mark, bss.offset); parseContentType(tmpBss); if (contentType == null) contentType = "text/html"; break; } else if (ByteScan.Equals(bss, closeHeadChars)) { break; } } }