/** * Reads a header/line according to the configuration. After calling the readLine method * additional information is available from public fields on the reader. * * @param in <code>InputStream</code> with characters * @return result wrapped in a <code>HeaderLine</code> object * @throws IOException if an i/o error occurs in the underlying input stream */ public HeaderLine readLine(PushbackInputStream in) throws IOException { HeaderLine headerLine = new HeaderLine(); int state; if (!bNameValue) { state = S_LINE; } else { state = S_NAME; } lineSb.setLength(0); nvSb.setLength(0); bytesOut = new ByteArrayOutputStreamWithUnread(); bfErrors = 0; int c; bCr = false; boolean bLoop = true; while (bLoop) { c = in.read(); if (c != -1) { bytesOut.write(c); } switch (state) { case S_LINE: switch (c) { case -1: // EOF. bfErrors |= E_BIT_EOF; headerLine.type = HeaderLine.HLT_LINE; headerLine.line = lineSb.toString(); lineSb.setLength(0); bLoop = false; break; case '\r': bCr = true; break; case '\n': headerLine.type = HeaderLine.HLT_LINE; headerLine.line = lineSb.toString(); lineSb.setLength(0); // Check EOL. check_eol(); bLoop = false; break; default: if (bCr) { // Misplaced CR. bfErrors |= E_BIT_MISPLACED_CR; bCr = false; } // Decode character. c = decode(c, in); if (c == -1) { // EOF. bfErrors |= E_BIT_EOF; headerLine.type = HeaderLine.HLT_LINE; headerLine.line = lineSb.toString(); lineSb.setLength(0); bLoop = false; } else { if (bValidChar && encoding != ENC_RAW) { if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) { bValidChar = false; // Invalid control char bfErrors |= E_BIT_INVALID_CONTROL_CHAR; } } if (bValidChar) { lineSb.append((char) c); } } break; } break; case S_NAME: switch (c) { case -1: // EOF. bfErrors |= E_BIT_EOF; headerLine.type = HeaderLine.HLT_LINE; headerLine.line = lineSb.toString(); lineSb.setLength(0); nvSb.setLength(0); bLoop = false; break; case '\r': bCr = true; break; case '\n': headerLine.type = HeaderLine.HLT_LINE; headerLine.line = lineSb.toString(); lineSb.setLength(0); nvSb.setLength(0); // Check EOL. check_eol(); bLoop = false; break; case ':': headerLine.type = HeaderLine.HLT_HEADERLINE; headerLine.name = nvSb.toString(); lineSb.setLength(0); nvSb.setLength(0); if (bCr) { // Misplaced CR. bfErrors |= E_BIT_MISPLACED_CR; bCr = false; } state = S_VALUE; break; default: if (bCr) { // Misplaced CR. bfErrors |= E_BIT_MISPLACED_CR; bCr = false; } // Decode character. c = decode(c, in); if (c == -1) { // EOF. bfErrors |= E_BIT_EOF; headerLine.type = HeaderLine.HLT_LINE; headerLine.line = lineSb.toString(); lineSb.setLength(0); nvSb.setLength(0); bLoop = false; } else { if (bValidChar && encoding != ENC_RAW) { if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) { bValidChar = false; // Invalid control char bfErrors |= E_BIT_INVALID_CONTROL_CHAR; } } if (bValidChar) { lineSb.append((char) c); if (c < 256 && ((charCharacteristicsTab[c] & CC_SEPARATOR_WS) == CC_SEPARATOR_WS)) { bValidChar = false; // Invalid separator in name bfErrors |= E_BIT_INVALID_SEPARATOR_CHAR; } } if (bValidChar) { nvSb.append((char) c); } } break; } break; case S_VALUE: switch (c) { case -1: // EOF. bfErrors |= E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; case '\r': bCr = true; break; case '\n': // Check EOL. check_eol(); if (bLWS) { state = S_LWS; } else { headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; } break; default: if (bCr) { // Misplaced CR. bfErrors |= E_BIT_MISPLACED_CR; bCr = false; } // Decode character. c = decode(c, in); if (c == -1) { // EOF. bfErrors |= E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; } else { if (bValidChar && encoding != ENC_RAW) { if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) { bValidChar = false; // Invalid control char bfErrors |= E_BIT_INVALID_CONTROL_CHAR; } } if (bValidChar) { switch (c) { case '\"': nvSb.append((char) c); if (bQuotedText) { state = S_QUOTED_TEXT; } break; case '=': if (bEncodedWords) { state = S_ENCODED_WORD_EQ; } else { nvSb.append((char) c); } break; default: nvSb.append((char) c); break; } } } break; } break; case S_LWS: switch (c) { case -1: // EOF. // bfErrors |= E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; case ' ': case '\t': nvSb.append(" "); state = S_VALUE; break; default: in.unread(c); bytesOut.unread(c); headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; } break; case S_QUOTED_TEXT: switch (c) { case -1: // EOF. bfErrors |= E_BIT_MISSING_QUOTE | E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; case '\"': if (bCr) { // Misplaced CR. bfErrors |= E_BIT_MISPLACED_CR; bCr = false; } nvSb.append((char) c); state = S_VALUE; break; case '\\': if (bCr) { // Misplaced CR. bfErrors |= E_BIT_MISPLACED_CR; bCr = false; } state = S_QUOTED_PAIR; break; case '\r': bCr = true; break; case '\n': // Check EOL. check_eol(); if (bLWS) { state = S_QUOTED_LWS; } else { headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; } break; default: if (bCr) { // Misplaced CR. bfErrors |= E_BIT_MISPLACED_CR; bCr = false; } // Decode character. c = decode(c, in); if (c == -1) { // EOF. bfErrors |= E_BIT_MISSING_QUOTE | E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; } else { if (bValidChar && encoding != ENC_RAW) { if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) { bValidChar = false; // Invalid control char bfErrors |= E_BIT_INVALID_CONTROL_CHAR; } } if (bValidChar) { nvSb.append((char) c); } } break; } break; case S_QUOTED_PAIR: switch (c) { case -1: nvSb.append('\\'); // EOF. bfErrors |= E_BIT_MISSING_QUOTED_PAIR_CHAR | E_BIT_MISSING_QUOTE | E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; default: // Decode character. c = decode(c, in); if (c == -1) { // EOF. bfErrors |= E_BIT_MISSING_QUOTED_PAIR_CHAR | E_BIT_MISSING_QUOTE | E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; } else { nvSb.append('\\'); nvSb.append((char) c); if (!bValidChar) { bfErrors |= E_BIT_INVALID_QUOTED_PAIR_CHAR; } state = S_QUOTED_TEXT; } break; } break; case S_QUOTED_LWS: switch (c) { case -1: // EOF. bfErrors |= E_BIT_MISSING_QUOTE; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; case ' ': case '\t': nvSb.append(" "); state = S_QUOTED_TEXT; break; default: in.unread(c); bytesOut.unread(c); bfErrors |= E_BIT_MISSING_QUOTE; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; } break; case S_ENCODED_WORD_EQ: switch (c) { case -1: nvSb.append('='); // EOF. bfErrors |= E_BIT_EOF; headerLine.value = trim(nvSb); nvSb.setLength(0); bLoop = false; break; case '?': // Unread "=?", so it can be parsed as an EncodedWord which always starts with "=?" in.unread('?'); in.unread('='); bytesOut.unread('?'); bytesOut.unread('='); EncodedWords ew = EncodedWords.parseEncodedWords(in, true); /* if (!ew.bIsValid) { // TODO Decide whether to report encoded word errors or interpret as non encoded words. } */ nvSb.append("=?"); in.unread(ew.line, 2, ew.line.length - 2); bytesOut.write("=?".getBytes()); state = S_VALUE; break; default: nvSb.append('='); in.unread(c); bytesOut.unread(c); state = S_VALUE; break; } break; } } headerLine.raw = bytesOut.toByteArray(); headerLine.bfErrors = bfErrors; bEof = (headerLine.raw.length == 0); return headerLine; }