/** Parses the HTTP response section, determining the body offset and setting the charset field */ private void parseResponse() { // int base; boolean parsingContentType = false; bss.init(responseBase, responseOffset, responseOffset + responseLength); if (!ByteScan.SkipToWhitespace(bss)) return; if (!ByteScan.SkipWhitespace(bss)) return; ByteScan.ParseInt(bss); responseCode = bss.ival; while (!bss.eob()) { if (bss.buf[bss.offset] == '\n') { if (parsingContentType) { tmpBss.init(bss.buf, bss.mark, bss.offset); parseContentType(tmpBss); parsingContentType = false; } if (bss.offset < bss.end - 2) { if (bss.buf[bss.offset + 1] == '\n') { responseBodyOffset = bss.offset + 2; bss.offset += 2; break; } else if (bss.buf[bss.offset + 1] == '\r' && bss.buf[bss.offset + 2] == '\n') { responseBodyOffset = bss.offset + 3; bss.offset += 3; break; } } else { responseBodyOffset = bss.end; bss.offset = bss.end; break; } bss.offset++; bss.mark(); } else if (bss.buf[bss.offset] == ':') { tmpBss.init(bss.buf, bss.mark, bss.offset); if (ByteScan.Equals(tmpBss, contentTypeChars)) parsingContentType = true; bss.offset++; bss.mark(); } else bss.offset++; } responseBodyOffset = bss.offset; if (contentType == null || contentType.startsWith("text")) findAndParseContentType(bss); }
/** * Looks for <META http-equiv="Content-Type" ... or <?xml ... encoding=" and extracts content-type * and/or charset info */ private void findAndParseContentType(ByteScan.State bss) { if (!ByteScan.FindSkip(bss, '<')) return; /** XML */ if (ByteScan.StartsWithSkip(bss, xmlChars)) { // set byte scan region to that inside <?xml ... ?> bss.mark(); if (!ByteScan.Find(bss, xmlEndChars)) return; bss.end = bss.offset; bss.flip(); // find encoding if (!ByteScan.FindSkip(bss, encodingChars)) return; // find starting doublequote if (!ByteScan.FindSkip(bss, '"')) return; bss.mark(); if (!ByteScan.Find(bss, '"')) return; charset = bss.toString().toUpperCase(); return; } /** HTML */ bss.offset--; while (ByteScan.FindSkip(bss, '<')) { if (ByteScan.StartsWithSkip(bss, metaChars)) { // make sure next comes whitespace bss.mark(); if (!ByteScan.SkipWhitespace(bss)) return; if (bss.offset == bss.mark) continue; if (!ByteScan.StartsWithSkip(bss, httpEquivChars)) continue; if (!ByteScan.SkipWhitespace(bss)) return; // skip '=' character if (bss.buf[bss.offset++] != '=') continue; if (!ByteScan.SkipWhitespace(bss)) return; // skip '"' character if (bss.buf[bss.offset++] != '"') continue; bss.mark(); if (!ByteScan.FindSkip(bss, '"')) return; bss.flip(); if (!ByteScan.StartsWith(bss, contentTypeChars)) continue; bss.flip(); if (!ByteScan.SkipWhitespace(bss)) return; if (!ByteScan.StartsWithSkip(bss, contentChars)) return; if (!ByteScan.SkipWhitespace(bss)) return; // skip '=' character if (bss.buf[bss.offset++] != '=') continue; if (!ByteScan.SkipWhitespace(bss)) return; // skip '=' character if (bss.buf[bss.offset++] != '"') continue; bss.mark(); if (!ByteScan.Find(bss, '"')) return; tmpBss.init(bss.buf, bss.mark, bss.offset); parseContentType(tmpBss); if (contentType == null) contentType = "text/html"; break; } else if (ByteScan.Equals(bss, closeHeadChars)) { break; } } }