Ejemplo n.º 1
0
  public static int setNameTreeValue(
      final PdfObject pdfObject,
      int i,
      final byte[] raw,
      final int length,
      final boolean ignoreRecursion,
      final int PDFkeyInt,
      final PdfFileReader objectReader) {

    boolean isRef = false;

    // move to start
    while (raw[i] != '[') { // can be number as well

      if (raw[i] == '(') { // allow for W (7)
        isRef = false;
        break;
      }

      // allow for number as in refer 9 0 R
      if (raw[i] >= '0' && raw[i] <= '9') {
        isRef = true;
        break;
      }

      i++;
    }

    // allow for direct or indirect
    byte[] data = raw;

    int start = i, j = i;

    int count = 0;

    // read ref data and slot in
    if (isRef) {
      // number
      final int[] values = StreamReaderUtils.readRefFromStream(raw, i);

      i = values[2];
      final int generation = values[1];
      final int number = values[0];

      if (raw[i] != 82) // we are expecting R to end ref
      {
        throw new RuntimeException(
            "3. Unexpected value in file "
                + raw[i]
                + " - please send to IDRsolutions for analysis");
      }

      if (!ignoreRecursion) {

        // read the Dictionary data
        data =
            objectReader.readObjectAsByteArray(
                pdfObject, objectReader.isCompressed(number, generation), number, generation);

        // allow for data in Linear object not yet loaded
        if (data == null) {
          pdfObject.setFullyResolved(false);

          if (debugFastCode) {
            System.out.println(padding + "Data not yet loaded");
          }

          i = length;
          return i;
        }

        // lose obj at start
        j = 3;
        while (data[j - 1] != 106
            && data[j - 2] != 98
            && data[j - 3] != 111
            && data[j - 3] != '<') {
          j++;
        }

        j = StreamReaderUtils.skipSpaces(data, j);

        // reset pointer
        start = j;
      }
    }

    // move to end
    while (j < data.length) {

      if (data[j] == '[' || data[j] == '(') {
        count++;
      } else if (data[j] == ']' || data[j] == ')') {
        count--;
      }

      if (count == 0) {
        break;
      }

      j++;
    }

    if (!ignoreRecursion) {
      final int stringLength = j - start + 1;
      byte[] newString = new byte[stringLength];

      System.arraycopy(data, start, newString, 0, stringLength);
      if (pdfObject.getObjectType() != PdfDictionary.Encrypt) {
        final DecryptionFactory decryption = objectReader.getDecryptionObject();
        if (decryption != null) {
          try {
            newString =
                decryption.decrypt(
                    newString, pdfObject.getObjectRefAsString(), false, null, false, false);
          } catch (final PdfSecurityException e) {
            LogWriter.writeLog("Exception: " + e.getMessage());
          }
        }
      }

      pdfObject.setTextStreamValue(PDFkeyInt, newString);

      if (debugFastCode) {
        System.out.println(padding + "name=" + new String(newString) + " set in " + pdfObject);
      }
    }

    // roll on
    if (!isRef) {
      i = j;
    }
    return i;
  }
Ejemplo n.º 2
0
  public static int setNameStringValue(
      final PdfObject pdfObject,
      int i,
      final byte[] raw,
      final boolean isMap,
      final Object PDFkey,
      final int PDFkeyInt,
      final PdfFileReader objectReader) {

    byte[] stringBytes;

    // move cursor to end of last command if needed
    while (raw[i] != 10
        && raw[i] != 13
        && raw[i] != 32
        && raw[i] != 47
        && raw[i] != '('
        && raw[i] != '<') {
      i++;
    }

    i = StreamReaderUtils.skipSpaces(raw, i);

    // work out if direct (ie /String or read ref 27 0 R
    int j2 = i;
    byte[] arrayData = raw;

    boolean isIndirect = raw[i] != 47 && raw[i] != 40 && raw[i] != 60; // Some /NAME values start (

    final boolean startsWithBrace = raw[i] == 40;

    // delete
    // @speed - lose this code once Filters done properly
    /*
     * just check its not /Filter [/FlateDecode ] or [] or [ /ASCII85Decode /FlateDecode ]
     * by checking next valid char not /
     */
    boolean isInsideArray = false;
    if (isIndirect) {
      int aa = i + 1;
      aa = StreamReaderUtils.skipSpaces(raw, aa);

      if (raw[aa] == 47 || raw[aa] == ']') {
        isIndirect = false;
        i = aa + 1;
        isInsideArray = true;
      }
    }

    if (isIndirect) { // its in another object so we need to fetch

      final int[] values = StreamReaderUtils.readRefFromStream(raw, i);
      final int ref = values[0];
      final int generation = values[1];
      i = values[2];

      if (raw[i] != 82) { // we are expecting R to end ref
        throw new RuntimeException(
            padding + "2. Unexpected value in file - please send to IDRsolutions for analysis");
      }

      // read the Dictionary data
      arrayData =
          objectReader.readObjectAsByteArray(
              pdfObject, objectReader.isCompressed(ref, generation), ref, generation);

      // allow for data in Linear object not yet loaded
      if (arrayData == null) {
        pdfObject.setFullyResolved(false);

        if (debugFastCode) {
          System.out.println(padding + "Data not yet loaded");
        }

        return raw.length;
      }

      // lose obj at start and roll onto /
      if (arrayData[0] == 47) {
        j2 = 0;
      } else {
        j2 = 3;

        while (arrayData[j2] != 47) {
          j2++;
        }
      }
    }

    // lose /
    j2++;

    // allow for no value with /Intent//Filter
    if (arrayData[j2] == 47) {
      return j2 - 1;
    }

    int end = j2 + 1;

    if (isInsideArray) { // values inside []

      // move cursor to start of text
      j2 = StreamReaderUtils.skipSpacesOrOtherCharacter(arrayData, j2, 47);

      int slashes = 0;

      // count chars
      byte lastChar = 0;
      while (true) {

        if (arrayData[end] == ']') {
          break;
        }

        if (arrayData[end] == 47
            && (lastChar == 32 || lastChar == 10 || lastChar == 13)) // count the / if gap before
        {
          slashes++;
        }

        lastChar = arrayData[end];
        end++;

        if (end == arrayData.length) {
          break;
        }
      }

      // set value and ensure space gap
      final int charCount = end - slashes;
      int ptr = 0;
      stringBytes = new byte[charCount - j2];

      byte nextChar, previous = 0;
      for (int ii = j2; ii < charCount; ii++) {
        nextChar = arrayData[ii];
        if (nextChar == 47) {
          if (previous != 32 && previous != 10 && previous != 13) {
            stringBytes[ptr] = 32;
            ptr++;
          }
        } else {
          stringBytes[ptr] = nextChar;
          ptr++;
        }

        previous = nextChar;
      }
    } else { // its in data stream directly or (string)

      // count chars
      while (true) {

        if (startsWithBrace) {
          if (arrayData[end] == ')' && !ObjectUtils.isEscaped(arrayData, end)) {
            break;
          }
        } else if (arrayData[end] == 32
            || arrayData[end] == 10
            || arrayData[end] == 13
            || arrayData[end] == 47
            || arrayData[end] == 62) {
          break;
        }

        end++;

        if (end == arrayData.length) {
          break;
        }
      }

      // set value
      final int charCount = end - j2;
      stringBytes = new byte[charCount];
      System.arraycopy(arrayData, j2, stringBytes, 0, charCount);
    }

    if (isMap) {
      pdfObject.setName(PDFkey, StringUtils.getTextString(stringBytes, false));
    } else {
      pdfObject.setName(PDFkeyInt, stringBytes);
    }

    if (debugFastCode) {
      System.out.println(
          padding + "String set as =" + new String(stringBytes) + "< written to " + pdfObject);
    }

    // put cursor in correct place (already there if ref)
    if (!isIndirect) {
      i = end - 1;
    }

    return i;
  }
  /** read reference table from file so we can locate objects in pdf file and read the trailers */
  private PdfObject readLegacyReferenceTable(
      PdfObject rootObj, int pointer, final int eof, final PdfFileReader currentPdfFile)
      throws PdfException {

    int endTable, current = 0; // current object number
    byte[] Bytes;
    int bufSize = 1024;

    /** read and decode 1 or more trailers */
    while (true) {

      try {

        // allow for pointer outside file
        Bytes = Trailer.readTrailer(bufSize, pointer, eof, pdf_datafile);

      } catch (final Exception e) {

        try {
          closeFile();
        } catch (final IOException e1) {
          if (LogWriter.isOutput()) {
            LogWriter.writeLog("Exception " + e + " closing file " + e1);
          }
        }
        throw new PdfException("Exception " + e + " reading trailer");
      }

      if (Bytes == null) // safety catch
      {
        break;
      }

      /** get trailer */
      int i = 0;

      final int maxLen = Bytes.length;

      // for(int a=0;a<100;a++)
      //	System.out.println((char)Bytes[i+a]);
      while (i < maxLen) { // look for trailer keyword
        if (Bytes[i] == 116
            && Bytes[i + 1] == 114
            && Bytes[i + 2] == 97
            && Bytes[i + 3] == 105
            && Bytes[i + 4] == 108
            && Bytes[i + 5] == 101
            && Bytes[i + 6] == 114) {
          break;
        }

        i++;
      }

      // save endtable position for later
      endTable = i;

      if (i == Bytes.length) {
        break;
      }

      // move to beyond <<
      while (Bytes[i] != 60 && Bytes[i - 1] != 60) {
        i++;
      }

      i++;
      final PdfObject pdfObject = new CompressedObject("1 0 R");
      Dictionary.readDictionary(pdfObject, i, Bytes, -1, true, currentPdfFile, false);

      // move to beyond >>
      int level = 0;
      while (true) {

        if (Bytes[i] == 60 && Bytes[i - 1] == 60) {
          level++;
          i++;
        } else if (Bytes[i] == '[') {
          i++;
          while (Bytes[i] != ']') {
            i++;
            if (i == Bytes.length) {
              break;
            }
          }
        } else if (Bytes[i] == 62 && Bytes[i - 1] == 62) {
          level--;
          i++;
        }

        if (level == 0) {
          break;
        }

        i++;
      }

      // handle optional XRefStm
      final int XRefStm = pdfObject.getInt(PdfDictionary.XRefStm);

      if (XRefStm != -1) {
        pointer = XRefStm;
      } else { // usual way

        boolean hasRef = true;

        /** handle spaces and comments */
        while (Bytes[i] == 10 || Bytes[i] == 13) {
          i++;
        }

        while (Bytes[i] == '%') {
          while (Bytes[i] != 10) {

            i++;
          }
          i++;
        }
        /* fix for /Users/markee/Downloads/oneiderapartnerbrochure_web_1371798737.pdf
        /**/

        // look for xref as end of startref
        while (Bytes[i] != 116
            && Bytes[i + 1] != 120
            && Bytes[i + 2] != 114
            && Bytes[i + 3] != 101
            && Bytes[i + 4] != 102) {

          if (Bytes[i] == 'o' && Bytes[i + 1] == 'b' && Bytes[i + 2] == 'j') {
            hasRef = false;
            break;
          }
          i++;
        }

        if (hasRef) {

          i += 8;
          // move to start of value ignoring spaces or returns
          while ((i < maxLen) && (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13)) {
            i++;
          }

          final int s = i;

          // allow for characters between xref and startref
          while (i < maxLen && Bytes[i] != 10 && Bytes[i] != 32 && Bytes[i] != 13) {
            i++;
          }

          /** convert xref to string to get pointer */
          if (s != i) {
            pointer = NumberUtils.parseInt(s, i, Bytes);
          }
        }
      }

      i = 0;

      // allow for bum data at start
      while (Bytes[i] == 13 || Bytes[i] == 10 || Bytes[i] == 9) {
        i++;
      }

      if (pointer == -1) {
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("No startRef");
        }

        /** now read the objects for the trailers */
      } else if (Bytes[i] == 120
          && Bytes[i + 1] == 114
          && Bytes[i + 2] == 101
          && Bytes[i + 3] == 102) { // make sure starts xref

        i = 5;

        // move to start of value ignoring spaces or returns
        while (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13) {
          i++;
        }

        current = offset.readXRefs(current, Bytes, endTable, i, eof, pdf_datafile);

        /**
         * now process trailer values - only first set of table values for root, encryption and info
         */
        if (rootObj == null) {

          rootObj = pdfObject.getDictionary(PdfDictionary.Root);

          encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt);
          if (encryptObj != null) {

            final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID);
            if (IDs != null && this.ID == null) {
              // only the first encountered ID should be used as a fileID for decryption
              this.ID = IDs[0];
            }
          }

          infoObject = pdfObject.getDictionary(PdfDictionary.Info);
        }

        // make sure first values used if several tables and code for prev
        pointer = pdfObject.getInt(PdfDictionary.Prev);

        // see if other trailers
        if (pointer != -1 && pointer < this.eof) {
          // reset values for loop
          bufSize = 1024;

          // track ref table so we can work out object length
          offset.addXref(pointer);

        } else // reset if fails second test above
        {
          pointer = -1;
        }

      } else {
        pointer = -1;

        // needs to be read to pick up potential /Pages value
        //noinspection ObjectAllocationInLoop
        rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
        currentPdfFile.readObject(rootObj);

        offset.setRefTableInvalid(true);
      }
      if (pointer == -1) {
        break;
      }
    }

    if (encryptObj == null
        && rootObj != null) { // manual check for broken file (ignore if Encrypted)

      int type = -1;

      int status = rootObj.getStatus();
      byte[] data = rootObj.getUnresolvedData();

      try {

        final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile);
        objectDecoder.checkResolved(rootObj);

        type = rootObj.getParameterConstant(PdfDictionary.Type);

      } catch (Exception e) { // we need to ignore so just catch, put back as was and log

        rootObj.setStatus(status);
        rootObj.setUnresolvedData(data, status);
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("[PDF] Exception reading type on root object " + e);
        }
      }

      // something gone wrong so manually index
      if (type == PdfDictionary.Font) { // see 21153 - ref table in wrong order
        rootObj = null; // /will reset in code at end
      }
    }

    // something gone wrong so manually index
    if (rootObj == null) { // see 21382

      offset.clear();
      offset.reuse();

      // needs to be read to pick up potential /Pages value
      //noinspection ObjectAllocationInLoop
      rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
      currentPdfFile.readObject(rootObj);

      offset.setRefTableInvalid(true);
    }

    return rootObj;
  }
  /**
   * read 1.5 compression stream ref table
   *
   * @throws PdfException
   */
  private PdfObject readCompressedStream(
      PdfObject rootObj,
      int pointer,
      final PdfFileReader currentPdfFile,
      final ObjectReader objectReader,
      final PdfObject linearObj)
      throws PdfException {

    while (pointer != -1) {

      /** get values to read stream ref */
      movePointer(pointer);

      final byte[] raw = objectReader.readObjectData(-1, null);

      /** read the object name from the start */
      final StringBuilder objectName = new StringBuilder();
      char current1, last = ' ';
      int matched = 0, i1 = 0;
      while (i1 < raw.length) {
        current1 = (char) raw[i1];

        // treat returns same as spaces
        if (current1 == 10 || current1 == 13) {
          current1 = ' ';
        }

        if (current1 == ' ' && last == ' ') { // lose duplicate or spaces
          matched = 0;
        } else if (current1 == pattern.charAt(matched)) { // looking for obj at end
          matched++;
        } else {
          matched = 0;
          objectName.append(current1);
        }
        if (matched == 3) {
          break;
        }
        last = current1;
        i1++;
      }

      // add end and put into Map
      objectName.append('R');

      final PdfObject pdfObject = new CompressedObject(objectName.toString());
      pdfObject.setCompressedStream(true);
      final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile);
      objectDecoder.readDictionaryAsObject(pdfObject, 0, raw);

      // read the field sizes
      final int[] fieldSizes = pdfObject.getIntArray(PdfDictionary.W);

      // read the xrefs stream
      byte[] xrefs = pdfObject.getDecodedStream();

      // if encr
      if (xrefs == null) {
        xrefs = currentPdfFile.readStream(pdfObject, true, true, false, false, true, null);
      }

      final int[] Index = pdfObject.getIntArray(PdfDictionary.Index);
      if (Index == null) { // single set of values

        // System.out.println("-------------1.Offsets-------------"+current+" "+numbEntries);
        CompressedObjects.readCompressedOffsets(
            0, 0, pdfObject.getInt(PdfDictionary.Size), fieldSizes, xrefs, offset, pdf_datafile);

      } else { // pairs of values in Index[] array
        final int count = Index.length;
        int pntr = 0;

        for (int aa = 0; aa < count; aa += 2) {

          // System.out.println("-------------2.Offsets-------------"+Index[aa]+" "+Index[aa+1]);

          pntr =
              CompressedObjects.readCompressedOffsets(
                  pntr, Index[aa], Index[aa + 1], fieldSizes, xrefs, offset, pdf_datafile);
        }
      }

      /**
       * now process trailer values - only first set of table values for root, encryption and info
       */
      if (rootObj == null) {

        rootObj = pdfObject.getDictionary(PdfDictionary.Root);

        /** handle encryption */
        encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt);

        if (encryptObj != null) {

          final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID);
          if (IDs != null && this.ID == null) {
            // only the first encountered ID should be used as a fileID for decryption
            this.ID = IDs[0];
          }
        }

        infoObject = pdfObject.getDictionary(PdfDictionary.Info);
      }

      // make sure first values used if several tables and code for prev so long as not linearized
      // may need adjusting as more examples turn up
      if (linearObj != null) {
        pointer = -1;
      } else {
        pointer = pdfObject.getInt(PdfDictionary.Prev);

        // a non-compressed object table can follow a compressed one so we need to allow for this
        if (!isCompressedStream(pointer, (int) eof)) {
          return readLegacyReferenceTable(rootObj, pointer, (int) eof, currentPdfFile);
        }
      }
    }

    return rootObj;
  }
  /**
   * read reference table start to see if new 1.5 type or traditional xref
   *
   * @throws PdfException
   */
  public final PdfObject readReferenceTable(
      final PdfObject linearObj,
      final PdfFileReader currentPdfFile,
      final ObjectReader objectReader)
      throws PdfException {

    int pointer = -1;
    final int eof = (int) this.eof;

    boolean islinearizedCompressed = false;

    if (linearObj == null) {
      pointer = readFirstStartRef();
    } else { // find at start of Linearized
      final byte[] data = pdf_datafile.getPdfBuffer();

      final int count = data.length;
      int ptr = 5;
      for (int i = 0; i < count; i++) {

        // track start of this object (needed for compressed)
        if (data[i] == 'e'
            && data[i + 1] == 'n'
            && data[i + 2] == 'd'
            && data[i + 3] == 'o'
            && data[i + 4] == 'b'
            && data[i + 5] == 'j') {
          ptr = i + 6;
        }

        if (data[i] == 'x' && data[i + 1] == 'r' && data[i + 2] == 'e' && data[i + 3] == 'f') {
          pointer = i;
          i = count;
        } else if (data[i] == 'X'
            && data[i + 1] == 'R'
            && data[i + 2] == 'e'
            && data[i + 3] == 'f') {

          islinearizedCompressed = true;

          pointer = ptr;
          while (data[pointer] == 10 || data[pointer] == 13 || data[pointer] == 32) {
            pointer++;
          }

          i = count;
        }
      }
    }

    offset.addXref(pointer);

    PdfObject rootObj = null;

    if (pointer >= eof || pointer == 0) {

      if (LogWriter.isOutput()) {
        LogWriter.writeLog("Pointer not if file - trying to manually find startref");
      }

      offset.setRefTableInvalid(true);

      try {
        rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
      } catch (Error err) {
        throw new PdfException(err.getMessage() + " attempting to manually scan file for objects");
      }

      currentPdfFile.readObject(rootObj);
      return rootObj;

    } else if (islinearizedCompressed || isCompressedStream(pointer, eof)) {
      return readCompressedStream(rootObj, pointer, currentPdfFile, objectReader, linearObj);
    } else {
      return readLegacyReferenceTable(rootObj, pointer, eof, currentPdfFile);
    }
  }