コード例 #1
0
 /**
  * read a < hex string >. The initial < has already been read.
  *
  * @param objNum the object number of the object containing the dictionary being read; negative
  *     only if the object number is unavailable, which should only happen if we're reading a
  *     string placed directly in the trailer
  * @param objGen the object generation of the object containing the object being read; negative
  *     only if the objNum is unavailable
  * @param decrypter the decrypter to use
  */
 private PDFObject readHexString(int objNum, int objGen, PDFDecrypter decrypter)
     throws IOException {
   // we've already read the <. Now get the hex bytes until >
   int val;
   StringBuffer sb = new StringBuffer();
   while ((val = readHexPair()) >= 0) {
     sb.append((char) val);
   }
   if (this.buf.get() != '>') {
     throw new PDFParseException("Bad character in Hex String");
   }
   return new PDFObject(
       this, PDFObject.STRING, decrypter.decryptString(objNum, objGen, sb.toString()));
 }
コード例 #2
0
  /**
   * read a ( character string ). The initial ( has already been read. Read until a *balanced* )
   * appears.
   *
   * <p>Section 3.2.3 of PDF Refernce version 1.7 defines the format of String objects. Regarding
   * literal strings:
   *
   * <blockquote>
   *
   * Within a literal string, the backslash (\) is used as an escape character for various purposes,
   * such as to include newline characters, nonprinting ASCII characters, unbalanced parentheses, or
   * the backslash character itself in the string. The character immediately following the backslash
   * determines its precise interpretation (see Table 3.2). If the character following the backslash
   * is not one of those shown in the table, the backslash is ignored.
   *
   * </blockquote>
   *
   * *
   *
   * <p>This only reads 8 bit basic character 'strings' so as to avoid a text string interpretation
   * when one is not desired (e.g., for byte strings, as used by the decryption mechanism). For an
   * interpretation of a string returned from this method, where the object type is defined as a
   * 'text string' as per Section 3.8.1, Table 3.31 "PDF Data Types", {@link
   * PDFStringUtil#asTextString} ()} or {@link PDFObject#getTextStringValue()} must be employed.
   *
   * @param objNum the object number of the object containing the dictionary being read; negative
   *     only if the object number is unavailable, which should only happen if we're reading a
   *     dictionary placed directly in the trailer
   * @param objGen the object generation of the object containing the object being read; negative
   *     only if the objNum is unavailable
   * @param decrypter the decrypter to use
   */
  private PDFObject readLiteralString(int objNum, int objGen, PDFDecrypter decrypter)
      throws IOException {
    int c;

    // we've already read the (.  now get the characters until a
    // *balanced* ) appears.  Translate \r \n \t \b \f \( \) \\ \ddd
    // if a cr/lf follows a backslash, ignore the cr/lf
    int parencount = 1;
    StringBuffer sb = new StringBuffer();

    while (parencount > 0) {
      c = this.buf.get() & 0xFF;
      // process unescaped parenthesis
      if (c == '(') {
        parencount++;
      } else if (c == ')') {
        parencount--;
        if (parencount == 0) {
          c = -1;
          break;
        }
      } else if (c == '\\') {

        // From the spec:
        // Within a literal string, the backslash (\) is used as an
        // escape character for various purposes, such as to include
        // newline characters, nonprinting ASCII characters,
        // unbalanced parentheses, or the backslash character itself
        // in the string. The character immediately following the
        // backslash determines its precise interpretation (see
        // Table 3.2). If the character following the backslash is not
        // one of those shown in the table, the backslash is ignored.
        //
        // summary of rules:
        //
        // \n \r \t \b \f 2-char sequences are used to represent their
        //  1-char counterparts
        //
        // \( and \) are used to escape parenthesis
        //
        // \\ for a literal backslash
        //
        // \ddd (1-3 octal digits) for a character code
        //
        //  \<EOL> is used to put formatting newlines into the
        //  file, but aren't actually part of the string; EOL may be
        //  CR, LF or CRLF
        //
        // any other sequence should see the backslash ignored

        // grab the next character to see what we're dealing with
        c = this.buf.get() & 0xFF;
        if (c >= '0' && c < '8') {
          // \ddd form - one to three OCTAL digits
          int count = 0;
          int val = 0;
          while (c >= '0' && c < '8' && count < 3) {
            val = val * 8 + c - '0';
            c = this.buf.get() & 0xFF;
            count++;
          }
          // we'll have read one character too many
          this.buf.position(this.buf.position() - 1);
          c = val;
        } else if (c == 'n') {
          c = '\n';
        } else if (c == 'r') {
          c = '\r';
        } else if (c == 't') {
          c = '\t';
        } else if (c == 'b') {
          c = '\b';
        } else if (c == 'f') {
          c = '\f';
        } else if (c == '\r') {
          // escaped CR to be ignored; look for a following LF
          c = this.buf.get() & 0xFF;
          if (c != '\n') {
            // not an LF, we'll consume this character on
            // the next iteration
            this.buf.position(this.buf.position() - 1);
          }
          c = -1;
        } else if (c == '\n') {
          // escaped LF to be ignored
          c = -1;
        }
        // any other c should be used as is, as it's either
        // one of ()\ in which case it should be used literally,
        // or the backslash should just be ignored
      }
      if (c >= 0) {
        sb.append((char) c);
      }
    }
    return new PDFObject(
        this, PDFObject.STRING, decrypter.decryptString(objNum, objGen, sb.toString()));
  }