/* See if a dictionary is a structure element. We identify it by the S and P elements, which are required, and by making sure that the Type element, if present, has a value of "StructElem". */ private boolean isStructElem(PdfDictionary elem) throws PdfException { try { PdfObject typ = elem.get("Type"); if (typ != null) { if (!"StructElem".equals(((PdfSimpleObject) typ).getStringValue())) { return false; } } PdfObject s = _module.resolveIndirectObject(elem.get("S")); // The structure type is supposed to be one of // a list of known structure types, or else is // mapped to one through the role map dictionary. // For the moment, just make sure it's a name. if (!(s instanceof PdfSimpleObject)) { return false; } Token tok = ((PdfSimpleObject) s).getToken(); if (!(tok instanceof Name)) { return false; } // It appears that there really isn't a requirement // to have structure types belong to the standard types. // Conditionalize this code out, pending more info. boolean checkStandardTypes = false; String st = ((Name) tok).getValue(); st = _tree.dereferenceStructType(st); if (!StdStructTypes.includes(st)) { if (checkStandardTypes) { throw new PdfInvalidException("Non-standard structure type name"); } } else { // The structure type is a standard one. } // The parent reference must be an indirect reference. // The documentation says it must refer to another // structure element dictionary, but it seems that it // must also be able to refer to the structure tree root. // I'll allow both. PdfObject pref = elem.get("P"); if (!(pref instanceof PdfIndirectObj)) { return false; } // Make sure it refers to a dictionary (at least). PdfDictionary p = (PdfDictionary) _module.resolveIndirectObject(pref); PdfSimpleObject ptype = (PdfSimpleObject) p.get("Type"); if (ptype != null) { String typename = ptype.getStringValue(); if (!"StructTreeRoot".equals(typename) && !"StructElem".equals(typename)) { return false; } } // Passed all tests. return true; } catch (Exception e) { // Some assumption was violated return false; } }
/* Determine if a dictionary is an object reference dictionary, as in table 9.12. */ private boolean isObjectRef(PdfDictionary dict) { try { PdfSimpleObject typeObj = (PdfSimpleObject) dict.get("Type"); if (!typeObj.getStringValue().equals("OBJR")) { return false; } // An Obj entry is required. Must be an indirect object. PdfObject obj = _module.resolveIndirectObject(dict.get("Obj")); if (obj == null) { return false; } return true; } catch (Exception e) { return false; } }
/* Determine if a dictionary is a marked content dictionary. See Table 9.11 in the PDF 1.4 book. */ private boolean isMarkedContent(PdfDictionary dict) { try { PdfSimpleObject typeObj = (PdfSimpleObject) dict.get("Type"); if (!typeObj.getStringValue().equals("MCR")) { return false; } // An MCID entry is required. PdfSimpleObject mcidObj = (PdfSimpleObject) _module.resolveIndirectObject(dict.get("MCID")); if (mcidObj == null) { return false; } return true; } catch (Exception e) { return false; } }
/* Check if an attribute dictionary is reasonable. */ private void checkAttribute(PdfDictionary attr) throws PdfException { try { // Must have an entry named "O", whose value is a name. PdfSimpleObject plugin = (PdfSimpleObject) attr.get("O"); Name tok = (Name) plugin.getToken(); // If it has a Placement entry with a value other than // "Inline", then we allow block level attributes. PdfSimpleObject placement = (PdfSimpleObject) attr.get("Placement"); if (placement != null && !"Inline".equals(placement.getStringValue())) { _structIsInline = false; } // Though I don't think the Adobe PDF bible actually // says so, it appears that the "attributes" are // simply other keys in the attribute dictionary. // Remember if we see attributes that can't go in BLSE's; // we'll check later if we're actually in a BLSE. if (attrIsBlockLevel(attr)) { _attrIsBlock = true; } } catch (Exception e) { throw new PdfInvalidException("Invalid attribute in document structure"); } }
/* Walk through the page tree and check all Resources dictionaries that we find. Along the way, we check several things: Color spaces. Any Separation and DeviceN resources we find must have an AlternateSpace of DeviceGray or DeviceCMYK. Extended graphic states. XObjects. */ private boolean resourcesOK() { PageTreeNode docTreeRoot = _module.getDocumentTree(); try { docTreeRoot.startWalk(); DocNode docNode; for (; ; ) { docNode = docTreeRoot.nextDocNode(); if (docNode == null) { break; } // Check for node-level resources PdfDictionary rsrc = docNode.getResources(); if (rsrc != null) { // Check extended graphics state. PdfDictionary gs = (PdfDictionary) _module.resolveIndirectObject(rsrc.get("ExtGState")); if (!extGStateOK(gs)) { return false; } // Check XObjects. PdfDictionary xo = (PdfDictionary) _module.resolveIndirectObject(rsrc.get("XObject")); if (!xObjectsOK(xo)) { return false; } } // Check content streams for resources if (docNode instanceof PageObject) { List streams = ((PageObject) docNode).getContentStreams(); if (streams != null) { Iterator iter = streams.listIterator(); while (iter.hasNext()) { PdfStream stream = (PdfStream) iter.next(); PdfDictionary dict = stream.getDict(); PdfDictionary rs = (PdfDictionary) dict.get("Resources"); if (rs != null) { PdfDictionary gs = (PdfDictionary) _module.resolveIndirectObject(rs.get("ExtGState")); if (!extGStateOK(gs)) { return false; } PdfDictionary xo = (PdfDictionary) _module.resolveIndirectObject(rs.get("XObject")); if (!xObjectsOK(xo)) { return false; } } // Also check for filters, to make sure // there aren't any forbidden LZW filters. PdfObject filters = dict.get("Filter"); if (!filterOK(filters, true, true)) { return false; } // External streams are also forbidden. if (dict.get("F") != null) { return false; } } } // Also check page objects for annotations -- // in particular, TrapNet annotations. PdfArray annots = ((PageObject) docNode).getAnnotations(); if (annots != null) { Vector annVec = annots.getContent(); for (int i = 0; i < annVec.size(); i++) { PdfDictionary annDict = (PdfDictionary) _module.resolveIndirectObject((PdfObject) annVec.elementAt(i)); PdfSimpleObject subtypeObj = (PdfSimpleObject) annDict.get("Subtype"); if ("TrapNet".equals(subtypeObj.getStringValue())) { // FontFauxing must be absent or 0-length PdfArray ff = (PdfArray) annDict.get("FontFauxing"); if (ff != null) { Vector ffVec = ff.getContent(); if (ffVec.size() > 0) { return false; // a faux pas } } } } } } } } catch (Exception e) { return false; } return true; // passed all tests }