Beispiel #1
0
 /**
  * Decode a character according to the expected encoding.
  *
  * @param c first character of the possibly encoded character sequence
  * @param in <code>InputStream</code> with possible extra encoded characters.
  * @return decoded character
  * @throws IOException if an i/o error occurs in the underlying input stream
  */
 protected int decode(int c, InputStream in) throws IOException {
   switch (encoding) {
     case ENC_UTF8:
       c = utf8.readUtf8(c, in);
       bytesOut.write(utf8.chars_read);
       bValidChar = utf8.bValidChar;
       if (c != -1) {
         if (!bValidChar) {
           // Invalid UTF-8 char
           bfErrors |= E_BIT_INVALID_UTF8_ENCODING;
         }
       }
       break;
     case ENC_US_ASCII:
       bValidChar = (c <= 127);
       if (!bValidChar) {
         // Invalid US-ASCII char
         bfErrors |= E_BIT_INVALID_US_ASCII_CHAR;
       }
       break;
     case ENC_ISO8859_1:
       // ISO-8859-1 utilizes all 8-bits and requires no decoding.
     case ENC_RAW:
       // Raw 8-bit character needs no decoding.
     default:
       bValidChar = true;
       break;
   }
   return c;
 }
Beispiel #2
0
 /**
  * Reads a header/line according to the configuration. After calling the readLine method
  * additional information is available from public fields on the reader.
  *
  * @param in <code>InputStream</code> with characters
  * @return result wrapped in a <code>HeaderLine</code> object
  * @throws IOException if an i/o error occurs in the underlying input stream
  */
 public HeaderLine readLine(PushbackInputStream in) throws IOException {
   HeaderLine headerLine = new HeaderLine();
   int state;
   if (!bNameValue) {
     state = S_LINE;
   } else {
     state = S_NAME;
   }
   lineSb.setLength(0);
   nvSb.setLength(0);
   bytesOut = new ByteArrayOutputStreamWithUnread();
   bfErrors = 0;
   int c;
   bCr = false;
   boolean bLoop = true;
   while (bLoop) {
     c = in.read();
     if (c != -1) {
       bytesOut.write(c);
     }
     switch (state) {
       case S_LINE:
         switch (c) {
           case -1:
             // EOF.
             bfErrors |= E_BIT_EOF;
             headerLine.type = HeaderLine.HLT_LINE;
             headerLine.line = lineSb.toString();
             lineSb.setLength(0);
             bLoop = false;
             break;
           case '\r':
             bCr = true;
             break;
           case '\n':
             headerLine.type = HeaderLine.HLT_LINE;
             headerLine.line = lineSb.toString();
             lineSb.setLength(0);
             // Check EOL.
             check_eol();
             bLoop = false;
             break;
           default:
             if (bCr) {
               // Misplaced CR.
               bfErrors |= E_BIT_MISPLACED_CR;
               bCr = false;
             }
             // Decode character.
             c = decode(c, in);
             if (c == -1) {
               // EOF.
               bfErrors |= E_BIT_EOF;
               headerLine.type = HeaderLine.HLT_LINE;
               headerLine.line = lineSb.toString();
               lineSb.setLength(0);
               bLoop = false;
             } else {
               if (bValidChar && encoding != ENC_RAW) {
                 if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) {
                   bValidChar = false;
                   // Invalid control char
                   bfErrors |= E_BIT_INVALID_CONTROL_CHAR;
                 }
               }
               if (bValidChar) {
                 lineSb.append((char) c);
               }
             }
             break;
         }
         break;
       case S_NAME:
         switch (c) {
           case -1:
             // EOF.
             bfErrors |= E_BIT_EOF;
             headerLine.type = HeaderLine.HLT_LINE;
             headerLine.line = lineSb.toString();
             lineSb.setLength(0);
             nvSb.setLength(0);
             bLoop = false;
             break;
           case '\r':
             bCr = true;
             break;
           case '\n':
             headerLine.type = HeaderLine.HLT_LINE;
             headerLine.line = lineSb.toString();
             lineSb.setLength(0);
             nvSb.setLength(0);
             // Check EOL.
             check_eol();
             bLoop = false;
             break;
           case ':':
             headerLine.type = HeaderLine.HLT_HEADERLINE;
             headerLine.name = nvSb.toString();
             lineSb.setLength(0);
             nvSb.setLength(0);
             if (bCr) {
               // Misplaced CR.
               bfErrors |= E_BIT_MISPLACED_CR;
               bCr = false;
             }
             state = S_VALUE;
             break;
           default:
             if (bCr) {
               // Misplaced CR.
               bfErrors |= E_BIT_MISPLACED_CR;
               bCr = false;
             }
             // Decode character.
             c = decode(c, in);
             if (c == -1) {
               // EOF.
               bfErrors |= E_BIT_EOF;
               headerLine.type = HeaderLine.HLT_LINE;
               headerLine.line = lineSb.toString();
               lineSb.setLength(0);
               nvSb.setLength(0);
               bLoop = false;
             } else {
               if (bValidChar && encoding != ENC_RAW) {
                 if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) {
                   bValidChar = false;
                   // Invalid control char
                   bfErrors |= E_BIT_INVALID_CONTROL_CHAR;
                 }
               }
               if (bValidChar) {
                 lineSb.append((char) c);
                 if (c < 256
                     && ((charCharacteristicsTab[c] & CC_SEPARATOR_WS) == CC_SEPARATOR_WS)) {
                   bValidChar = false;
                   // Invalid separator in name
                   bfErrors |= E_BIT_INVALID_SEPARATOR_CHAR;
                 }
               }
               if (bValidChar) {
                 nvSb.append((char) c);
               }
             }
             break;
         }
         break;
       case S_VALUE:
         switch (c) {
           case -1:
             // EOF.
             bfErrors |= E_BIT_EOF;
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
           case '\r':
             bCr = true;
             break;
           case '\n':
             // Check EOL.
             check_eol();
             if (bLWS) {
               state = S_LWS;
             } else {
               headerLine.value = trim(nvSb);
               nvSb.setLength(0);
               bLoop = false;
             }
             break;
           default:
             if (bCr) {
               // Misplaced CR.
               bfErrors |= E_BIT_MISPLACED_CR;
               bCr = false;
             }
             // Decode character.
             c = decode(c, in);
             if (c == -1) {
               // EOF.
               bfErrors |= E_BIT_EOF;
               headerLine.value = trim(nvSb);
               nvSb.setLength(0);
               bLoop = false;
             } else {
               if (bValidChar && encoding != ENC_RAW) {
                 if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) {
                   bValidChar = false;
                   // Invalid control char
                   bfErrors |= E_BIT_INVALID_CONTROL_CHAR;
                 }
               }
               if (bValidChar) {
                 switch (c) {
                   case '\"':
                     nvSb.append((char) c);
                     if (bQuotedText) {
                       state = S_QUOTED_TEXT;
                     }
                     break;
                   case '=':
                     if (bEncodedWords) {
                       state = S_ENCODED_WORD_EQ;
                     } else {
                       nvSb.append((char) c);
                     }
                     break;
                   default:
                     nvSb.append((char) c);
                     break;
                 }
               }
             }
             break;
         }
         break;
       case S_LWS:
         switch (c) {
           case -1:
             // EOF.
             // bfErrors |= E_BIT_EOF;
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
           case ' ':
           case '\t':
             nvSb.append(" ");
             state = S_VALUE;
             break;
           default:
             in.unread(c);
             bytesOut.unread(c);
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
         }
         break;
       case S_QUOTED_TEXT:
         switch (c) {
           case -1:
             // EOF.
             bfErrors |= E_BIT_MISSING_QUOTE | E_BIT_EOF;
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
           case '\"':
             if (bCr) {
               // Misplaced CR.
               bfErrors |= E_BIT_MISPLACED_CR;
               bCr = false;
             }
             nvSb.append((char) c);
             state = S_VALUE;
             break;
           case '\\':
             if (bCr) {
               // Misplaced CR.
               bfErrors |= E_BIT_MISPLACED_CR;
               bCr = false;
             }
             state = S_QUOTED_PAIR;
             break;
           case '\r':
             bCr = true;
             break;
           case '\n':
             // Check EOL.
             check_eol();
             if (bLWS) {
               state = S_QUOTED_LWS;
             } else {
               headerLine.value = trim(nvSb);
               nvSb.setLength(0);
               bLoop = false;
             }
             break;
           default:
             if (bCr) {
               // Misplaced CR.
               bfErrors |= E_BIT_MISPLACED_CR;
               bCr = false;
             }
             // Decode character.
             c = decode(c, in);
             if (c == -1) {
               // EOF.
               bfErrors |= E_BIT_MISSING_QUOTE | E_BIT_EOF;
               headerLine.value = trim(nvSb);
               nvSb.setLength(0);
               bLoop = false;
             } else {
               if (bValidChar && encoding != ENC_RAW) {
                 if (c < 256 && ((charCharacteristicsTab[c] & CC_CONTROL) == CC_CONTROL)) {
                   bValidChar = false;
                   // Invalid control char
                   bfErrors |= E_BIT_INVALID_CONTROL_CHAR;
                 }
               }
               if (bValidChar) {
                 nvSb.append((char) c);
               }
             }
             break;
         }
         break;
       case S_QUOTED_PAIR:
         switch (c) {
           case -1:
             nvSb.append('\\');
             // EOF.
             bfErrors |= E_BIT_MISSING_QUOTED_PAIR_CHAR | E_BIT_MISSING_QUOTE | E_BIT_EOF;
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
           default:
             // Decode character.
             c = decode(c, in);
             if (c == -1) {
               // EOF.
               bfErrors |= E_BIT_MISSING_QUOTED_PAIR_CHAR | E_BIT_MISSING_QUOTE | E_BIT_EOF;
               headerLine.value = trim(nvSb);
               nvSb.setLength(0);
               bLoop = false;
             } else {
               nvSb.append('\\');
               nvSb.append((char) c);
               if (!bValidChar) {
                 bfErrors |= E_BIT_INVALID_QUOTED_PAIR_CHAR;
               }
               state = S_QUOTED_TEXT;
             }
             break;
         }
         break;
       case S_QUOTED_LWS:
         switch (c) {
           case -1:
             // EOF.
             bfErrors |= E_BIT_MISSING_QUOTE;
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
           case ' ':
           case '\t':
             nvSb.append(" ");
             state = S_QUOTED_TEXT;
             break;
           default:
             in.unread(c);
             bytesOut.unread(c);
             bfErrors |= E_BIT_MISSING_QUOTE;
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
         }
         break;
       case S_ENCODED_WORD_EQ:
         switch (c) {
           case -1:
             nvSb.append('=');
             // EOF.
             bfErrors |= E_BIT_EOF;
             headerLine.value = trim(nvSb);
             nvSb.setLength(0);
             bLoop = false;
             break;
           case '?':
             //  Unread "=?", so it can be parsed as an EncodedWord which always starts with "=?"
             in.unread('?');
             in.unread('=');
             bytesOut.unread('?');
             bytesOut.unread('=');
             EncodedWords ew = EncodedWords.parseEncodedWords(in, true);
             /*
             if (!ew.bIsValid) {
                 // TODO Decide whether to report encoded word errors or interpret as non encoded words.
             }
             */
             nvSb.append("=?");
             in.unread(ew.line, 2, ew.line.length - 2);
             bytesOut.write("=?".getBytes());
             state = S_VALUE;
             break;
           default:
             nvSb.append('=');
             in.unread(c);
             bytesOut.unread(c);
             state = S_VALUE;
             break;
         }
         break;
     }
   }
   headerLine.raw = bytesOut.toByteArray();
   headerLine.bfErrors = bfErrors;
   bEof = (headerLine.raw.length == 0);
   return headerLine;
 }