/* See if a dictionary is a structure element.
     We identify it by the S and P elements, which are
     required, and by making sure that the Type element,
     if present, has a value of "StructElem".
  */
  private boolean isStructElem(PdfDictionary elem) throws PdfException {
    try {
      PdfObject typ = elem.get("Type");
      if (typ != null) {
        if (!"StructElem".equals(((PdfSimpleObject) typ).getStringValue())) {
          return false;
        }
      }

      PdfObject s = _module.resolveIndirectObject(elem.get("S"));
      // The structure type is supposed to be one of
      // a list of known structure types, or else is
      // mapped to one through the role map dictionary.
      // For the moment, just make sure it's a name.
      if (!(s instanceof PdfSimpleObject)) {
        return false;
      }
      Token tok = ((PdfSimpleObject) s).getToken();
      if (!(tok instanceof Name)) {
        return false;
      }
      // It appears that there really isn't a requirement
      // to have structure types belong to the standard types.
      // Conditionalize this code out, pending more info.
      boolean checkStandardTypes = false;
      String st = ((Name) tok).getValue();
      st = _tree.dereferenceStructType(st);
      if (!StdStructTypes.includes(st)) {
        if (checkStandardTypes) {
          throw new PdfInvalidException("Non-standard structure type name");
        }
      } else {
        // The structure type is a standard one.
      }
      // The parent reference must be an indirect reference.
      // The documentation says it must refer to another
      // structure element dictionary, but it seems that it
      // must also be able to refer to the structure tree root.
      // I'll allow both.
      PdfObject pref = elem.get("P");
      if (!(pref instanceof PdfIndirectObj)) {
        return false;
      }
      // Make sure it refers to a dictionary (at least).
      PdfDictionary p = (PdfDictionary) _module.resolveIndirectObject(pref);
      PdfSimpleObject ptype = (PdfSimpleObject) p.get("Type");
      if (ptype != null) {
        String typename = ptype.getStringValue();
        if (!"StructTreeRoot".equals(typename) && !"StructElem".equals(typename)) {
          return false;
        }
      }
      // Passed all tests.
      return true;
    } catch (Exception e) {
      // Some assumption was violated
      return false;
    }
  }
 /* Determine if a dictionary is an object reference dictionary,
 as in table 9.12. */
 private boolean isObjectRef(PdfDictionary dict) {
   try {
     PdfSimpleObject typeObj = (PdfSimpleObject) dict.get("Type");
     if (!typeObj.getStringValue().equals("OBJR")) {
       return false;
     }
     // An Obj entry is required. Must be an indirect object.
     PdfObject obj = _module.resolveIndirectObject(dict.get("Obj"));
     if (obj == null) {
       return false;
     }
     return true;
   } catch (Exception e) {
     return false;
   }
 }
 /* Determine if a dictionary is a marked content dictionary.
 See Table 9.11 in the PDF 1.4 book. */
 private boolean isMarkedContent(PdfDictionary dict) {
   try {
     PdfSimpleObject typeObj = (PdfSimpleObject) dict.get("Type");
     if (!typeObj.getStringValue().equals("MCR")) {
       return false;
     }
     // An MCID entry is required.
     PdfSimpleObject mcidObj = (PdfSimpleObject) _module.resolveIndirectObject(dict.get("MCID"));
     if (mcidObj == null) {
       return false;
     }
     return true;
   } catch (Exception e) {
     return false;
   }
 }
  /* Check if an attribute dictionary is reasonable. */
  private void checkAttribute(PdfDictionary attr) throws PdfException {
    try {
      // Must have an entry named "O", whose value is a name.
      PdfSimpleObject plugin = (PdfSimpleObject) attr.get("O");
      Name tok = (Name) plugin.getToken();

      // If it has a Placement entry with a value other than
      // "Inline", then we allow block level attributes.
      PdfSimpleObject placement = (PdfSimpleObject) attr.get("Placement");
      if (placement != null && !"Inline".equals(placement.getStringValue())) {
        _structIsInline = false;
      }
      // Though I don't think the Adobe PDF bible actually
      // says so, it appears that the "attributes" are
      // simply other keys in the attribute dictionary.
      // Remember if we see attributes that can't go in BLSE's;
      // we'll check later if we're actually in a BLSE.
      if (attrIsBlockLevel(attr)) {
        _attrIsBlock = true;
      }
    } catch (Exception e) {
      throw new PdfInvalidException("Invalid attribute in document structure");
    }
  }
示例#5
0
  /* Walk through the page tree and check all Resources dictionaries
    that we find.  Along the way, we check several things:

    Color spaces. Any Separation and DeviceN resources we
    find must have an AlternateSpace of DeviceGray or
    DeviceCMYK.

    Extended graphic states.

    XObjects.
  */
  private boolean resourcesOK() {
    PageTreeNode docTreeRoot = _module.getDocumentTree();
    try {
      docTreeRoot.startWalk();
      DocNode docNode;
      for (; ; ) {
        docNode = docTreeRoot.nextDocNode();
        if (docNode == null) {
          break;
        }
        // Check for node-level resources
        PdfDictionary rsrc = docNode.getResources();
        if (rsrc != null) {

          // Check extended graphics state.
          PdfDictionary gs = (PdfDictionary) _module.resolveIndirectObject(rsrc.get("ExtGState"));
          if (!extGStateOK(gs)) {
            return false;
          }

          // Check XObjects.
          PdfDictionary xo = (PdfDictionary) _module.resolveIndirectObject(rsrc.get("XObject"));
          if (!xObjectsOK(xo)) {
            return false;
          }
        }

        // Check content streams for  resources
        if (docNode instanceof PageObject) {
          List streams = ((PageObject) docNode).getContentStreams();
          if (streams != null) {
            Iterator iter = streams.listIterator();
            while (iter.hasNext()) {
              PdfStream stream = (PdfStream) iter.next();
              PdfDictionary dict = stream.getDict();
              PdfDictionary rs = (PdfDictionary) dict.get("Resources");
              if (rs != null) {
                PdfDictionary gs =
                    (PdfDictionary) _module.resolveIndirectObject(rs.get("ExtGState"));
                if (!extGStateOK(gs)) {
                  return false;
                }

                PdfDictionary xo = (PdfDictionary) _module.resolveIndirectObject(rs.get("XObject"));
                if (!xObjectsOK(xo)) {
                  return false;
                }
              }

              // Also check for filters, to make sure
              // there aren't any forbidden LZW filters.
              PdfObject filters = dict.get("Filter");
              if (!filterOK(filters, true, true)) {
                return false;
              }

              // External streams are also forbidden.
              if (dict.get("F") != null) {
                return false;
              }
            }
          }

          // Also check page objects for annotations --
          // in particular, TrapNet annotations.
          PdfArray annots = ((PageObject) docNode).getAnnotations();
          if (annots != null) {
            Vector annVec = annots.getContent();
            for (int i = 0; i < annVec.size(); i++) {
              PdfDictionary annDict =
                  (PdfDictionary) _module.resolveIndirectObject((PdfObject) annVec.elementAt(i));
              PdfSimpleObject subtypeObj = (PdfSimpleObject) annDict.get("Subtype");
              if ("TrapNet".equals(subtypeObj.getStringValue())) {
                // FontFauxing must be absent or 0-length
                PdfArray ff = (PdfArray) annDict.get("FontFauxing");
                if (ff != null) {
                  Vector ffVec = ff.getContent();
                  if (ffVec.size() > 0) {
                    return false; // a faux pas
                  }
                }
              }
            }
          }
        }
      }
    } catch (Exception e) {
      return false;
    }
    return true; // passed all tests
  }