public static int setNameTreeValue( final PdfObject pdfObject, int i, final byte[] raw, final int length, final boolean ignoreRecursion, final int PDFkeyInt, final PdfFileReader objectReader) { boolean isRef = false; // move to start while (raw[i] != '[') { // can be number as well if (raw[i] == '(') { // allow for W (7) isRef = false; break; } // allow for number as in refer 9 0 R if (raw[i] >= '0' && raw[i] <= '9') { isRef = true; break; } i++; } // allow for direct or indirect byte[] data = raw; int start = i, j = i; int count = 0; // read ref data and slot in if (isRef) { // number final int[] values = StreamReaderUtils.readRefFromStream(raw, i); i = values[2]; final int generation = values[1]; final int number = values[0]; if (raw[i] != 82) // we are expecting R to end ref { throw new RuntimeException( "3. Unexpected value in file " + raw[i] + " - please send to IDRsolutions for analysis"); } if (!ignoreRecursion) { // read the Dictionary data data = objectReader.readObjectAsByteArray( pdfObject, objectReader.isCompressed(number, generation), number, generation); // allow for data in Linear object not yet loaded if (data == null) { pdfObject.setFullyResolved(false); if (debugFastCode) { System.out.println(padding + "Data not yet loaded"); } i = length; return i; } // lose obj at start j = 3; while (data[j - 1] != 106 && data[j - 2] != 98 && data[j - 3] != 111 && data[j - 3] != '<') { j++; } j = StreamReaderUtils.skipSpaces(data, j); // reset pointer start = j; } } // move to end while (j < data.length) { if (data[j] == '[' || data[j] == '(') { count++; } else if (data[j] == ']' || data[j] == ')') { count--; } if (count == 0) { break; } j++; } if (!ignoreRecursion) { final int stringLength = j - start + 1; byte[] newString = new byte[stringLength]; System.arraycopy(data, start, newString, 0, stringLength); if (pdfObject.getObjectType() != PdfDictionary.Encrypt) { final DecryptionFactory decryption = objectReader.getDecryptionObject(); if (decryption != null) { try { newString = decryption.decrypt( newString, pdfObject.getObjectRefAsString(), false, null, false, false); } catch (final PdfSecurityException e) { LogWriter.writeLog("Exception: " + e.getMessage()); } } } pdfObject.setTextStreamValue(PDFkeyInt, newString); if (debugFastCode) { System.out.println(padding + "name=" + new String(newString) + " set in " + pdfObject); } } // roll on if (!isRef) { i = j; } return i; }
public static int setNameStringValue( final PdfObject pdfObject, int i, final byte[] raw, final boolean isMap, final Object PDFkey, final int PDFkeyInt, final PdfFileReader objectReader) { byte[] stringBytes; // move cursor to end of last command if needed while (raw[i] != 10 && raw[i] != 13 && raw[i] != 32 && raw[i] != 47 && raw[i] != '(' && raw[i] != '<') { i++; } i = StreamReaderUtils.skipSpaces(raw, i); // work out if direct (ie /String or read ref 27 0 R int j2 = i; byte[] arrayData = raw; boolean isIndirect = raw[i] != 47 && raw[i] != 40 && raw[i] != 60; // Some /NAME values start ( final boolean startsWithBrace = raw[i] == 40; // delete // @speed - lose this code once Filters done properly /* * just check its not /Filter [/FlateDecode ] or [] or [ /ASCII85Decode /FlateDecode ] * by checking next valid char not / */ boolean isInsideArray = false; if (isIndirect) { int aa = i + 1; aa = StreamReaderUtils.skipSpaces(raw, aa); if (raw[aa] == 47 || raw[aa] == ']') { isIndirect = false; i = aa + 1; isInsideArray = true; } } if (isIndirect) { // its in another object so we need to fetch final int[] values = StreamReaderUtils.readRefFromStream(raw, i); final int ref = values[0]; final int generation = values[1]; i = values[2]; if (raw[i] != 82) { // we are expecting R to end ref throw new RuntimeException( padding + "2. Unexpected value in file - please send to IDRsolutions for analysis"); } // read the Dictionary data arrayData = objectReader.readObjectAsByteArray( pdfObject, objectReader.isCompressed(ref, generation), ref, generation); // allow for data in Linear object not yet loaded if (arrayData == null) { pdfObject.setFullyResolved(false); if (debugFastCode) { System.out.println(padding + "Data not yet loaded"); } return raw.length; } // lose obj at start and roll onto / if (arrayData[0] == 47) { j2 = 0; } else { j2 = 3; while (arrayData[j2] != 47) { j2++; } } } // lose / j2++; // allow for no value with /Intent//Filter if (arrayData[j2] == 47) { return j2 - 1; } int end = j2 + 1; if (isInsideArray) { // values inside [] // move cursor to start of text j2 = StreamReaderUtils.skipSpacesOrOtherCharacter(arrayData, j2, 47); int slashes = 0; // count chars byte lastChar = 0; while (true) { if (arrayData[end] == ']') { break; } if (arrayData[end] == 47 && (lastChar == 32 || lastChar == 10 || lastChar == 13)) // count the / if gap before { slashes++; } lastChar = arrayData[end]; end++; if (end == arrayData.length) { break; } } // set value and ensure space gap final int charCount = end - slashes; int ptr = 0; stringBytes = new byte[charCount - j2]; byte nextChar, previous = 0; for (int ii = j2; ii < charCount; ii++) { nextChar = arrayData[ii]; if (nextChar == 47) { if (previous != 32 && previous != 10 && previous != 13) { stringBytes[ptr] = 32; ptr++; } } else { stringBytes[ptr] = nextChar; ptr++; } previous = nextChar; } } else { // its in data stream directly or (string) // count chars while (true) { if (startsWithBrace) { if (arrayData[end] == ')' && !ObjectUtils.isEscaped(arrayData, end)) { break; } } else if (arrayData[end] == 32 || arrayData[end] == 10 || arrayData[end] == 13 || arrayData[end] == 47 || arrayData[end] == 62) { break; } end++; if (end == arrayData.length) { break; } } // set value final int charCount = end - j2; stringBytes = new byte[charCount]; System.arraycopy(arrayData, j2, stringBytes, 0, charCount); } if (isMap) { pdfObject.setName(PDFkey, StringUtils.getTextString(stringBytes, false)); } else { pdfObject.setName(PDFkeyInt, stringBytes); } if (debugFastCode) { System.out.println( padding + "String set as =" + new String(stringBytes) + "< written to " + pdfObject); } // put cursor in correct place (already there if ref) if (!isIndirect) { i = end - 1; } return i; }
/** read reference table from file so we can locate objects in pdf file and read the trailers */ private PdfObject readLegacyReferenceTable( PdfObject rootObj, int pointer, final int eof, final PdfFileReader currentPdfFile) throws PdfException { int endTable, current = 0; // current object number byte[] Bytes; int bufSize = 1024; /** read and decode 1 or more trailers */ while (true) { try { // allow for pointer outside file Bytes = Trailer.readTrailer(bufSize, pointer, eof, pdf_datafile); } catch (final Exception e) { try { closeFile(); } catch (final IOException e1) { if (LogWriter.isOutput()) { LogWriter.writeLog("Exception " + e + " closing file " + e1); } } throw new PdfException("Exception " + e + " reading trailer"); } if (Bytes == null) // safety catch { break; } /** get trailer */ int i = 0; final int maxLen = Bytes.length; // for(int a=0;a<100;a++) // System.out.println((char)Bytes[i+a]); while (i < maxLen) { // look for trailer keyword if (Bytes[i] == 116 && Bytes[i + 1] == 114 && Bytes[i + 2] == 97 && Bytes[i + 3] == 105 && Bytes[i + 4] == 108 && Bytes[i + 5] == 101 && Bytes[i + 6] == 114) { break; } i++; } // save endtable position for later endTable = i; if (i == Bytes.length) { break; } // move to beyond << while (Bytes[i] != 60 && Bytes[i - 1] != 60) { i++; } i++; final PdfObject pdfObject = new CompressedObject("1 0 R"); Dictionary.readDictionary(pdfObject, i, Bytes, -1, true, currentPdfFile, false); // move to beyond >> int level = 0; while (true) { if (Bytes[i] == 60 && Bytes[i - 1] == 60) { level++; i++; } else if (Bytes[i] == '[') { i++; while (Bytes[i] != ']') { i++; if (i == Bytes.length) { break; } } } else if (Bytes[i] == 62 && Bytes[i - 1] == 62) { level--; i++; } if (level == 0) { break; } i++; } // handle optional XRefStm final int XRefStm = pdfObject.getInt(PdfDictionary.XRefStm); if (XRefStm != -1) { pointer = XRefStm; } else { // usual way boolean hasRef = true; /** handle spaces and comments */ while (Bytes[i] == 10 || Bytes[i] == 13) { i++; } while (Bytes[i] == '%') { while (Bytes[i] != 10) { i++; } i++; } /* fix for /Users/markee/Downloads/oneiderapartnerbrochure_web_1371798737.pdf /**/ // look for xref as end of startref while (Bytes[i] != 116 && Bytes[i + 1] != 120 && Bytes[i + 2] != 114 && Bytes[i + 3] != 101 && Bytes[i + 4] != 102) { if (Bytes[i] == 'o' && Bytes[i + 1] == 'b' && Bytes[i + 2] == 'j') { hasRef = false; break; } i++; } if (hasRef) { i += 8; // move to start of value ignoring spaces or returns while ((i < maxLen) && (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13)) { i++; } final int s = i; // allow for characters between xref and startref while (i < maxLen && Bytes[i] != 10 && Bytes[i] != 32 && Bytes[i] != 13) { i++; } /** convert xref to string to get pointer */ if (s != i) { pointer = NumberUtils.parseInt(s, i, Bytes); } } } i = 0; // allow for bum data at start while (Bytes[i] == 13 || Bytes[i] == 10 || Bytes[i] == 9) { i++; } if (pointer == -1) { if (LogWriter.isOutput()) { LogWriter.writeLog("No startRef"); } /** now read the objects for the trailers */ } else if (Bytes[i] == 120 && Bytes[i + 1] == 114 && Bytes[i + 2] == 101 && Bytes[i + 3] == 102) { // make sure starts xref i = 5; // move to start of value ignoring spaces or returns while (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13) { i++; } current = offset.readXRefs(current, Bytes, endTable, i, eof, pdf_datafile); /** * now process trailer values - only first set of table values for root, encryption and info */ if (rootObj == null) { rootObj = pdfObject.getDictionary(PdfDictionary.Root); encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt); if (encryptObj != null) { final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID); if (IDs != null && this.ID == null) { // only the first encountered ID should be used as a fileID for decryption this.ID = IDs[0]; } } infoObject = pdfObject.getDictionary(PdfDictionary.Info); } // make sure first values used if several tables and code for prev pointer = pdfObject.getInt(PdfDictionary.Prev); // see if other trailers if (pointer != -1 && pointer < this.eof) { // reset values for loop bufSize = 1024; // track ref table so we can work out object length offset.addXref(pointer); } else // reset if fails second test above { pointer = -1; } } else { pointer = -1; // needs to be read to pick up potential /Pages value //noinspection ObjectAllocationInLoop rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset)); currentPdfFile.readObject(rootObj); offset.setRefTableInvalid(true); } if (pointer == -1) { break; } } if (encryptObj == null && rootObj != null) { // manual check for broken file (ignore if Encrypted) int type = -1; int status = rootObj.getStatus(); byte[] data = rootObj.getUnresolvedData(); try { final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile); objectDecoder.checkResolved(rootObj); type = rootObj.getParameterConstant(PdfDictionary.Type); } catch (Exception e) { // we need to ignore so just catch, put back as was and log rootObj.setStatus(status); rootObj.setUnresolvedData(data, status); if (LogWriter.isOutput()) { LogWriter.writeLog("[PDF] Exception reading type on root object " + e); } } // something gone wrong so manually index if (type == PdfDictionary.Font) { // see 21153 - ref table in wrong order rootObj = null; // /will reset in code at end } } // something gone wrong so manually index if (rootObj == null) { // see 21382 offset.clear(); offset.reuse(); // needs to be read to pick up potential /Pages value //noinspection ObjectAllocationInLoop rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset)); currentPdfFile.readObject(rootObj); offset.setRefTableInvalid(true); } return rootObj; }
/** * read 1.5 compression stream ref table * * @throws PdfException */ private PdfObject readCompressedStream( PdfObject rootObj, int pointer, final PdfFileReader currentPdfFile, final ObjectReader objectReader, final PdfObject linearObj) throws PdfException { while (pointer != -1) { /** get values to read stream ref */ movePointer(pointer); final byte[] raw = objectReader.readObjectData(-1, null); /** read the object name from the start */ final StringBuilder objectName = new StringBuilder(); char current1, last = ' '; int matched = 0, i1 = 0; while (i1 < raw.length) { current1 = (char) raw[i1]; // treat returns same as spaces if (current1 == 10 || current1 == 13) { current1 = ' '; } if (current1 == ' ' && last == ' ') { // lose duplicate or spaces matched = 0; } else if (current1 == pattern.charAt(matched)) { // looking for obj at end matched++; } else { matched = 0; objectName.append(current1); } if (matched == 3) { break; } last = current1; i1++; } // add end and put into Map objectName.append('R'); final PdfObject pdfObject = new CompressedObject(objectName.toString()); pdfObject.setCompressedStream(true); final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile); objectDecoder.readDictionaryAsObject(pdfObject, 0, raw); // read the field sizes final int[] fieldSizes = pdfObject.getIntArray(PdfDictionary.W); // read the xrefs stream byte[] xrefs = pdfObject.getDecodedStream(); // if encr if (xrefs == null) { xrefs = currentPdfFile.readStream(pdfObject, true, true, false, false, true, null); } final int[] Index = pdfObject.getIntArray(PdfDictionary.Index); if (Index == null) { // single set of values // System.out.println("-------------1.Offsets-------------"+current+" "+numbEntries); CompressedObjects.readCompressedOffsets( 0, 0, pdfObject.getInt(PdfDictionary.Size), fieldSizes, xrefs, offset, pdf_datafile); } else { // pairs of values in Index[] array final int count = Index.length; int pntr = 0; for (int aa = 0; aa < count; aa += 2) { // System.out.println("-------------2.Offsets-------------"+Index[aa]+" "+Index[aa+1]); pntr = CompressedObjects.readCompressedOffsets( pntr, Index[aa], Index[aa + 1], fieldSizes, xrefs, offset, pdf_datafile); } } /** * now process trailer values - only first set of table values for root, encryption and info */ if (rootObj == null) { rootObj = pdfObject.getDictionary(PdfDictionary.Root); /** handle encryption */ encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt); if (encryptObj != null) { final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID); if (IDs != null && this.ID == null) { // only the first encountered ID should be used as a fileID for decryption this.ID = IDs[0]; } } infoObject = pdfObject.getDictionary(PdfDictionary.Info); } // make sure first values used if several tables and code for prev so long as not linearized // may need adjusting as more examples turn up if (linearObj != null) { pointer = -1; } else { pointer = pdfObject.getInt(PdfDictionary.Prev); // a non-compressed object table can follow a compressed one so we need to allow for this if (!isCompressedStream(pointer, (int) eof)) { return readLegacyReferenceTable(rootObj, pointer, (int) eof, currentPdfFile); } } } return rootObj; }
/** * read reference table start to see if new 1.5 type or traditional xref * * @throws PdfException */ public final PdfObject readReferenceTable( final PdfObject linearObj, final PdfFileReader currentPdfFile, final ObjectReader objectReader) throws PdfException { int pointer = -1; final int eof = (int) this.eof; boolean islinearizedCompressed = false; if (linearObj == null) { pointer = readFirstStartRef(); } else { // find at start of Linearized final byte[] data = pdf_datafile.getPdfBuffer(); final int count = data.length; int ptr = 5; for (int i = 0; i < count; i++) { // track start of this object (needed for compressed) if (data[i] == 'e' && data[i + 1] == 'n' && data[i + 2] == 'd' && data[i + 3] == 'o' && data[i + 4] == 'b' && data[i + 5] == 'j') { ptr = i + 6; } if (data[i] == 'x' && data[i + 1] == 'r' && data[i + 2] == 'e' && data[i + 3] == 'f') { pointer = i; i = count; } else if (data[i] == 'X' && data[i + 1] == 'R' && data[i + 2] == 'e' && data[i + 3] == 'f') { islinearizedCompressed = true; pointer = ptr; while (data[pointer] == 10 || data[pointer] == 13 || data[pointer] == 32) { pointer++; } i = count; } } } offset.addXref(pointer); PdfObject rootObj = null; if (pointer >= eof || pointer == 0) { if (LogWriter.isOutput()) { LogWriter.writeLog("Pointer not if file - trying to manually find startref"); } offset.setRefTableInvalid(true); try { rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset)); } catch (Error err) { throw new PdfException(err.getMessage() + " attempting to manually scan file for objects"); } currentPdfFile.readObject(rootObj); return rootObj; } else if (islinearizedCompressed || isCompressedStream(pointer, eof)) { return readCompressedStream(rootObj, pointer, currentPdfFile, objectReader, linearObj); } else { return readLegacyReferenceTable(rootObj, pointer, eof, currentPdfFile); } }