コード例 #1
0
  /**
   * read 1.5 compression stream ref table
   *
   * @throws PdfException
   */
  private PdfObject readCompressedStream(
      PdfObject rootObj,
      int pointer,
      final PdfFileReader currentPdfFile,
      final ObjectReader objectReader,
      final PdfObject linearObj)
      throws PdfException {

    while (pointer != -1) {

      /** get values to read stream ref */
      movePointer(pointer);

      final byte[] raw = objectReader.readObjectData(-1, null);

      /** read the object name from the start */
      final StringBuilder objectName = new StringBuilder();
      char current1, last = ' ';
      int matched = 0, i1 = 0;
      while (i1 < raw.length) {
        current1 = (char) raw[i1];

        // treat returns same as spaces
        if (current1 == 10 || current1 == 13) {
          current1 = ' ';
        }

        if (current1 == ' ' && last == ' ') { // lose duplicate or spaces
          matched = 0;
        } else if (current1 == pattern.charAt(matched)) { // looking for obj at end
          matched++;
        } else {
          matched = 0;
          objectName.append(current1);
        }
        if (matched == 3) {
          break;
        }
        last = current1;
        i1++;
      }

      // add end and put into Map
      objectName.append('R');

      final PdfObject pdfObject = new CompressedObject(objectName.toString());
      pdfObject.setCompressedStream(true);
      final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile);
      objectDecoder.readDictionaryAsObject(pdfObject, 0, raw);

      // read the field sizes
      final int[] fieldSizes = pdfObject.getIntArray(PdfDictionary.W);

      // read the xrefs stream
      byte[] xrefs = pdfObject.getDecodedStream();

      // if encr
      if (xrefs == null) {
        xrefs = currentPdfFile.readStream(pdfObject, true, true, false, false, true, null);
      }

      final int[] Index = pdfObject.getIntArray(PdfDictionary.Index);
      if (Index == null) { // single set of values

        // System.out.println("-------------1.Offsets-------------"+current+" "+numbEntries);
        CompressedObjects.readCompressedOffsets(
            0, 0, pdfObject.getInt(PdfDictionary.Size), fieldSizes, xrefs, offset, pdf_datafile);

      } else { // pairs of values in Index[] array
        final int count = Index.length;
        int pntr = 0;

        for (int aa = 0; aa < count; aa += 2) {

          // System.out.println("-------------2.Offsets-------------"+Index[aa]+" "+Index[aa+1]);

          pntr =
              CompressedObjects.readCompressedOffsets(
                  pntr, Index[aa], Index[aa + 1], fieldSizes, xrefs, offset, pdf_datafile);
        }
      }

      /**
       * now process trailer values - only first set of table values for root, encryption and info
       */
      if (rootObj == null) {

        rootObj = pdfObject.getDictionary(PdfDictionary.Root);

        /** handle encryption */
        encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt);

        if (encryptObj != null) {

          final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID);
          if (IDs != null && this.ID == null) {
            // only the first encountered ID should be used as a fileID for decryption
            this.ID = IDs[0];
          }
        }

        infoObject = pdfObject.getDictionary(PdfDictionary.Info);
      }

      // make sure first values used if several tables and code for prev so long as not linearized
      // may need adjusting as more examples turn up
      if (linearObj != null) {
        pointer = -1;
      } else {
        pointer = pdfObject.getInt(PdfDictionary.Prev);

        // a non-compressed object table can follow a compressed one so we need to allow for this
        if (!isCompressedStream(pointer, (int) eof)) {
          return readLegacyReferenceTable(rootObj, pointer, (int) eof, currentPdfFile);
        }
      }
    }

    return rootObj;
  }
コード例 #2
0
  /** read reference table from file so we can locate objects in pdf file and read the trailers */
  private PdfObject readLegacyReferenceTable(
      PdfObject rootObj, int pointer, final int eof, final PdfFileReader currentPdfFile)
      throws PdfException {

    int endTable, current = 0; // current object number
    byte[] Bytes;
    int bufSize = 1024;

    /** read and decode 1 or more trailers */
    while (true) {

      try {

        // allow for pointer outside file
        Bytes = Trailer.readTrailer(bufSize, pointer, eof, pdf_datafile);

      } catch (final Exception e) {

        try {
          closeFile();
        } catch (final IOException e1) {
          if (LogWriter.isOutput()) {
            LogWriter.writeLog("Exception " + e + " closing file " + e1);
          }
        }
        throw new PdfException("Exception " + e + " reading trailer");
      }

      if (Bytes == null) // safety catch
      {
        break;
      }

      /** get trailer */
      int i = 0;

      final int maxLen = Bytes.length;

      // for(int a=0;a<100;a++)
      //	System.out.println((char)Bytes[i+a]);
      while (i < maxLen) { // look for trailer keyword
        if (Bytes[i] == 116
            && Bytes[i + 1] == 114
            && Bytes[i + 2] == 97
            && Bytes[i + 3] == 105
            && Bytes[i + 4] == 108
            && Bytes[i + 5] == 101
            && Bytes[i + 6] == 114) {
          break;
        }

        i++;
      }

      // save endtable position for later
      endTable = i;

      if (i == Bytes.length) {
        break;
      }

      // move to beyond <<
      while (Bytes[i] != 60 && Bytes[i - 1] != 60) {
        i++;
      }

      i++;
      final PdfObject pdfObject = new CompressedObject("1 0 R");
      Dictionary.readDictionary(pdfObject, i, Bytes, -1, true, currentPdfFile, false);

      // move to beyond >>
      int level = 0;
      while (true) {

        if (Bytes[i] == 60 && Bytes[i - 1] == 60) {
          level++;
          i++;
        } else if (Bytes[i] == '[') {
          i++;
          while (Bytes[i] != ']') {
            i++;
            if (i == Bytes.length) {
              break;
            }
          }
        } else if (Bytes[i] == 62 && Bytes[i - 1] == 62) {
          level--;
          i++;
        }

        if (level == 0) {
          break;
        }

        i++;
      }

      // handle optional XRefStm
      final int XRefStm = pdfObject.getInt(PdfDictionary.XRefStm);

      if (XRefStm != -1) {
        pointer = XRefStm;
      } else { // usual way

        boolean hasRef = true;

        /** handle spaces and comments */
        while (Bytes[i] == 10 || Bytes[i] == 13) {
          i++;
        }

        while (Bytes[i] == '%') {
          while (Bytes[i] != 10) {

            i++;
          }
          i++;
        }
        /* fix for /Users/markee/Downloads/oneiderapartnerbrochure_web_1371798737.pdf
        /**/

        // look for xref as end of startref
        while (Bytes[i] != 116
            && Bytes[i + 1] != 120
            && Bytes[i + 2] != 114
            && Bytes[i + 3] != 101
            && Bytes[i + 4] != 102) {

          if (Bytes[i] == 'o' && Bytes[i + 1] == 'b' && Bytes[i + 2] == 'j') {
            hasRef = false;
            break;
          }
          i++;
        }

        if (hasRef) {

          i += 8;
          // move to start of value ignoring spaces or returns
          while ((i < maxLen) && (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13)) {
            i++;
          }

          final int s = i;

          // allow for characters between xref and startref
          while (i < maxLen && Bytes[i] != 10 && Bytes[i] != 32 && Bytes[i] != 13) {
            i++;
          }

          /** convert xref to string to get pointer */
          if (s != i) {
            pointer = NumberUtils.parseInt(s, i, Bytes);
          }
        }
      }

      i = 0;

      // allow for bum data at start
      while (Bytes[i] == 13 || Bytes[i] == 10 || Bytes[i] == 9) {
        i++;
      }

      if (pointer == -1) {
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("No startRef");
        }

        /** now read the objects for the trailers */
      } else if (Bytes[i] == 120
          && Bytes[i + 1] == 114
          && Bytes[i + 2] == 101
          && Bytes[i + 3] == 102) { // make sure starts xref

        i = 5;

        // move to start of value ignoring spaces or returns
        while (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13) {
          i++;
        }

        current = offset.readXRefs(current, Bytes, endTable, i, eof, pdf_datafile);

        /**
         * now process trailer values - only first set of table values for root, encryption and info
         */
        if (rootObj == null) {

          rootObj = pdfObject.getDictionary(PdfDictionary.Root);

          encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt);
          if (encryptObj != null) {

            final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID);
            if (IDs != null && this.ID == null) {
              // only the first encountered ID should be used as a fileID for decryption
              this.ID = IDs[0];
            }
          }

          infoObject = pdfObject.getDictionary(PdfDictionary.Info);
        }

        // make sure first values used if several tables and code for prev
        pointer = pdfObject.getInt(PdfDictionary.Prev);

        // see if other trailers
        if (pointer != -1 && pointer < this.eof) {
          // reset values for loop
          bufSize = 1024;

          // track ref table so we can work out object length
          offset.addXref(pointer);

        } else // reset if fails second test above
        {
          pointer = -1;
        }

      } else {
        pointer = -1;

        // needs to be read to pick up potential /Pages value
        //noinspection ObjectAllocationInLoop
        rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
        currentPdfFile.readObject(rootObj);

        offset.setRefTableInvalid(true);
      }
      if (pointer == -1) {
        break;
      }
    }

    if (encryptObj == null
        && rootObj != null) { // manual check for broken file (ignore if Encrypted)

      int type = -1;

      int status = rootObj.getStatus();
      byte[] data = rootObj.getUnresolvedData();

      try {

        final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile);
        objectDecoder.checkResolved(rootObj);

        type = rootObj.getParameterConstant(PdfDictionary.Type);

      } catch (Exception e) { // we need to ignore so just catch, put back as was and log

        rootObj.setStatus(status);
        rootObj.setUnresolvedData(data, status);
        if (LogWriter.isOutput()) {
          LogWriter.writeLog("[PDF] Exception reading type on root object " + e);
        }
      }

      // something gone wrong so manually index
      if (type == PdfDictionary.Font) { // see 21153 - ref table in wrong order
        rootObj = null; // /will reset in code at end
      }
    }

    // something gone wrong so manually index
    if (rootObj == null) { // see 21382

      offset.clear();
      offset.reuse();

      // needs to be read to pick up potential /Pages value
      //noinspection ObjectAllocationInLoop
      rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
      currentPdfFile.readObject(rootObj);

      offset.setRefTableInvalid(true);
    }

    return rootObj;
  }