コード例 #1
0
  /** read reference table from file so we can locate objects in pdf file and read the trailers */
  private PdfObject readLegacyReferenceTable(
      PdfObject rootObj, int pointer, final int eof, final PdfFileReader currentPdfFile)
      throws PdfException {

    int endTable, current = 0; // current object number
    byte[] Bytes;
    int bufSize = 1024;

    /** read and decode 1 or more trailers */
    while (true) {

      try {

        // allow for pointer outside file
        Bytes = Trailer.readTrailer(bufSize, pointer, eof, pdf_datafile);

      } catch (final Exception e) {

        try {
          closeFile();
        } catch (final IOException e1) {
          if (LogWriter.isOutput()) {
            LogWriter.writeLog("Exception " + e + " closing file " + e1);
          }
        }
        throw new PdfException("Exception " + e + " reading trailer");
      }

      if (Bytes == null) // safety catch
      {
        break;
      }

      /** get trailer */
      int i = 0;

      final int maxLen = Bytes.length;

      // for(int a=0;a<100;a++)
      //	System.out.println((char)Bytes[i+a]);
      while (i < maxLen) { // look for trailer keyword
        if (Bytes[i] == 116
            && Bytes[i + 1] == 114
            && Bytes[i + 2] == 97
            && Bytes[i + 3] == 105
            && Bytes[i + 4] == 108
            && Bytes[i + 5] == 101
            && Bytes[i + 6] == 114) {
          break;
        }

        i++;
      }

      // save endtable position for later
      endTable = i;

      if (i == Bytes.length) {
        break;
      }

      // move to beyond <<
      while (Bytes[i] != 60 && Bytes[i - 1] != 60) {
        i++;
      }

      i++;
      final PdfObject pdfObject = new CompressedObject("1 0 R");
      Dictionary.readDictionary(pdfObject, i, Bytes, -1, true, currentPdfFile, false);

      // move to beyond >>
      int level = 0;
      while (true) {

        if (Bytes[i] == 60 && Bytes[i - 1] == 60) {
          level++;
          i++;
        } else if (Bytes[i] == '[') {
          i++;
          while (Bytes[i] != ']') {
            i++;
            if (i == Bytes.length) {
              break;
            }
          }
        } else if (Bytes[i] == 62 && Bytes[i - 1] == 62) {
          level--;
          i++;
        }

        if (level == 0) {
          break;
        }

        i++;
      }

      // handle optional XRefStm
      final int XRefStm = pdfObject.getInt(PdfDictionary.XRefStm);

      if (XRefStm != -1) {
        pointer = XRefStm;
      } else { // usual way

        boolean hasRef = true;

        /** handle spaces and comments */
        while (Bytes[i] == 10 || Bytes[i] == 13) {
          i++;
        }

        while (Bytes[i] == '%') {
          while (Bytes[i] != 10) {

            i++;
          }
          i++;
        }
        /* fix for /Users/markee/Downloads/oneiderapartnerbrochure_web_1371798737.pdf
        /**/

        // look for xref as end of startref
        while (Bytes[i] != 116
            && Bytes[i + 1] != 120
            && Bytes[i + 2] != 114
            && Bytes[i + 3] != 101
            && Bytes[i + 4] != 102) {

          if (Bytes[i] == 'o' && Bytes[i + 1] == 'b' && Bytes[i + 2] == 'j') {
            hasRef = false;
            break;
          }
          i++;
        }

        if (hasRef) {

          i += 8;
          // move to start of value ignoring spaces or returns
          while ((i < maxLen) && (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13)) {
            i++;
          }

          final int s = i;

          // allow for characters between xref and startref
          while (i < maxLen && Bytes[i] != 10 && Bytes[i] != 32 && Bytes[i] != 13) {
            i++;
          }

          /** convert xref to string to get pointer */
          if (s != i) {
            pointer = NumberUtils.parseInt(s, i, Bytes);
          }
        }
      }

      i = 0;

      // allow for bum data at start
      while (Bytes[i] == 13 || Bytes[i] == 10 || Bytes[i] == 9) {
        i++;
      }

      if (pointer == -1) {
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("No startRef");
        }

        /** now read the objects for the trailers */
      } else if (Bytes[i] == 120
          && Bytes[i + 1] == 114
          && Bytes[i + 2] == 101
          && Bytes[i + 3] == 102) { // make sure starts xref

        i = 5;

        // move to start of value ignoring spaces or returns
        while (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13) {
          i++;
        }

        current = offset.readXRefs(current, Bytes, endTable, i, eof, pdf_datafile);

        /**
         * now process trailer values - only first set of table values for root, encryption and info
         */
        if (rootObj == null) {

          rootObj = pdfObject.getDictionary(PdfDictionary.Root);

          encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt);
          if (encryptObj != null) {

            final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID);
            if (IDs != null && this.ID == null) {
              // only the first encountered ID should be used as a fileID for decryption
              this.ID = IDs[0];
            }
          }

          infoObject = pdfObject.getDictionary(PdfDictionary.Info);
        }

        // make sure first values used if several tables and code for prev
        pointer = pdfObject.getInt(PdfDictionary.Prev);

        // see if other trailers
        if (pointer != -1 && pointer < this.eof) {
          // reset values for loop
          bufSize = 1024;

          // track ref table so we can work out object length
          offset.addXref(pointer);

        } else // reset if fails second test above
        {
          pointer = -1;
        }

      } else {
        pointer = -1;

        // needs to be read to pick up potential /Pages value
        //noinspection ObjectAllocationInLoop
        rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
        currentPdfFile.readObject(rootObj);

        offset.setRefTableInvalid(true);
      }
      if (pointer == -1) {
        break;
      }
    }

    if (encryptObj == null
        && rootObj != null) { // manual check for broken file (ignore if Encrypted)

      int type = -1;

      int status = rootObj.getStatus();
      byte[] data = rootObj.getUnresolvedData();

      try {

        final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile);
        objectDecoder.checkResolved(rootObj);

        type = rootObj.getParameterConstant(PdfDictionary.Type);

      } catch (Exception e) { // we need to ignore so just catch, put back as was and log

        rootObj.setStatus(status);
        rootObj.setUnresolvedData(data, status);
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("[PDF] Exception reading type on root object " + e);
        }
      }

      // something gone wrong so manually index
      if (type == PdfDictionary.Font) { // see 21153 - ref table in wrong order
        rootObj = null; // /will reset in code at end
      }
    }

    // something gone wrong so manually index
    if (rootObj == null) { // see 21382

      offset.clear();
      offset.reuse();

      // needs to be read to pick up potential /Pages value
      //noinspection ObjectAllocationInLoop
      rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
      currentPdfFile.readObject(rootObj);

      offset.setRefTableInvalid(true);
    }

    return rootObj;
  }
コード例 #2
0
  /** read first start ref from last 1024 bytes */
  private int readFirstStartRef() throws PdfException {

    // reset flag
    offset.setRefTableInvalid(false);

    int pointer = -1;
    int i = 1019;
    final StringBuilder startRef = new StringBuilder(10);

    /** move to end of file and read last 1024 bytes */
    final int block = 1024;
    byte[] lastBytes = new byte[block];
    long end;

    /** set endpoint, losing null chars and anything before EOF */
    final int[] EndOfFileMarker = {37, 37, 69, 79};
    int valReached = 3;
    boolean EOFFound = false;
    try {
      end = eof;

      /** lose nulls and other trash from end of file */
      final int bufSize = 255;
      while (true) {
        final byte[] buffer = getBytes(end - bufSize, bufSize);

        int offset = 0;

        for (int ii = bufSize - 1; ii > -1; ii--) {

          // see if we can decrement EOF tracker or restart check
          if (!EOFFound) {
            valReached = 3;
          }

          if (buffer[ii] == EndOfFileMarker[valReached]) {
            valReached--;
            EOFFound = true;
          } else {
            EOFFound = false;
          }

          // move to next byte
          offset--;

          if (valReached < 0) {
            ii = -1;
          }
        }

        // exit if found values on loop
        if (valReached < 0) {
          end -= offset;
          break;
        } else {
          end -= bufSize;
        }

        // allow for no eof
        if (end < 0) {
          end = eof;
          break;
        }
      }

      // end=end+bufSize;

      // allow for very small file
      int count = (int) (end - block);

      if (count < 0) {
        count = 0;
        final int size = (int) eof;
        lastBytes = new byte[size];
        i = size + 3; // force reset below
      }

      lastBytes = getBytes(count, lastBytes.length);

    } catch (final Exception e) {
      if (LogWriter.isOutput()) {
        LogWriter.writeLog("Exception " + e + " reading last 1024 bytes");
      }

      throw new PdfException(e + " reading last 1024 bytes");
    }

    //		for(int ii=0;ii<lastBytes.length;ii++){
    //		System.out.print((char)lastBytes[ii]);
    //		}
    //		System.out.println();

    // look for tref as end of startxref
    final int fileSize = lastBytes.length;

    if (i > fileSize) {
      i = fileSize - 5;
    }

    while (i > -1) {

      // first check is because startref works as well a startxref !!
      if (((lastBytes[i] == 116 && lastBytes[i + 1] == 120)
              || (lastBytes[i] == 114 && lastBytes[i + 1] == 116))
          && (lastBytes[i + 2] == 114)
          && (lastBytes[i + 3] == 101)
          && (lastBytes[i + 4] == 102)) {
        break;
      }

      i--;
    }

    /** trap buggy files */
    if (i == -1) {
      try {
        closeFile();
      } catch (final IOException e1) {
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("Exception " + e1 + " closing file");
        }
      }
      throw new PdfException("No Startxref found in last 1024 bytes ");
    }

    i += 5; // allow for word length

    // move to start of value ignoring spaces or returns
    while (i < 1024 && (lastBytes[i] == 10 || lastBytes[i] == 32 || lastBytes[i] == 13)) {
      i++;
    }

    // move to start of value ignoring spaces or returns
    while ((i < 1024) && (lastBytes[i] != 10) && (lastBytes[i] != 32) && (lastBytes[i] != 13)) {
      startRef.append((char) lastBytes[i]);
      i++;
    }

    /** convert xref to string to get pointer */
    if (startRef.length() > 0) {
      pointer = Integer.parseInt(startRef.toString());
    }

    if (pointer == -1) {
      if (LogWriter.isOutput()) {
        LogWriter.writeLog("No Startref found in last 1024 bytes ");
      }
      try {
        closeFile();
      } catch (final IOException e1) {
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("Exception " + e1 + " closing file");
        }
      }
      throw new PdfException("No Startref found in last 1024 bytes ");
    }

    return pointer;
  }
コード例 #3
0
  /**
   * read reference table start to see if new 1.5 type or traditional xref
   *
   * @throws PdfException
   */
  public final PdfObject readReferenceTable(
      final PdfObject linearObj,
      final PdfFileReader currentPdfFile,
      final ObjectReader objectReader)
      throws PdfException {

    int pointer = -1;
    final int eof = (int) this.eof;

    boolean islinearizedCompressed = false;

    if (linearObj == null) {
      pointer = readFirstStartRef();
    } else { // find at start of Linearized
      final byte[] data = pdf_datafile.getPdfBuffer();

      final int count = data.length;
      int ptr = 5;
      for (int i = 0; i < count; i++) {

        // track start of this object (needed for compressed)
        if (data[i] == 'e'
            && data[i + 1] == 'n'
            && data[i + 2] == 'd'
            && data[i + 3] == 'o'
            && data[i + 4] == 'b'
            && data[i + 5] == 'j') {
          ptr = i + 6;
        }

        if (data[i] == 'x' && data[i + 1] == 'r' && data[i + 2] == 'e' && data[i + 3] == 'f') {
          pointer = i;
          i = count;
        } else if (data[i] == 'X'
            && data[i + 1] == 'R'
            && data[i + 2] == 'e'
            && data[i + 3] == 'f') {

          islinearizedCompressed = true;

          pointer = ptr;
          while (data[pointer] == 10 || data[pointer] == 13 || data[pointer] == 32) {
            pointer++;
          }

          i = count;
        }
      }
    }

    offset.addXref(pointer);

    PdfObject rootObj = null;

    if (pointer >= eof || pointer == 0) {

      if (LogWriter.isOutput()) {
        LogWriter.writeLog("Pointer not if file - trying to manually find startref");
      }

      offset.setRefTableInvalid(true);

      try {
        rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
      } catch (Error err) {
        throw new PdfException(err.getMessage() + " attempting to manually scan file for objects");
      }

      currentPdfFile.readObject(rootObj);
      return rootObj;

    } else if (islinearizedCompressed || isCompressedStream(pointer, eof)) {
      return readCompressedStream(rootObj, pointer, currentPdfFile, objectReader, linearObj);
    } else {
      return readLegacyReferenceTable(rootObj, pointer, eof, currentPdfFile);
    }
  }