/* See if a dictionary is a structure element.
     We identify it by the S and P elements, which are
     required, and by making sure that the Type element,
     if present, has a value of "StructElem".
  */
  private boolean isStructElem(PdfDictionary elem) throws PdfException {
    try {
      PdfObject typ = elem.get("Type");
      if (typ != null) {
        if (!"StructElem".equals(((PdfSimpleObject) typ).getStringValue())) {
          return false;
        }
      }

      PdfObject s = _module.resolveIndirectObject(elem.get("S"));
      // The structure type is supposed to be one of
      // a list of known structure types, or else is
      // mapped to one through the role map dictionary.
      // For the moment, just make sure it's a name.
      if (!(s instanceof PdfSimpleObject)) {
        return false;
      }
      Token tok = ((PdfSimpleObject) s).getToken();
      if (!(tok instanceof Name)) {
        return false;
      }
      // It appears that there really isn't a requirement
      // to have structure types belong to the standard types.
      // Conditionalize this code out, pending more info.
      boolean checkStandardTypes = false;
      String st = ((Name) tok).getValue();
      st = _tree.dereferenceStructType(st);
      if (!StdStructTypes.includes(st)) {
        if (checkStandardTypes) {
          throw new PdfInvalidException("Non-standard structure type name");
        }
      } else {
        // The structure type is a standard one.
      }
      // The parent reference must be an indirect reference.
      // The documentation says it must refer to another
      // structure element dictionary, but it seems that it
      // must also be able to refer to the structure tree root.
      // I'll allow both.
      PdfObject pref = elem.get("P");
      if (!(pref instanceof PdfIndirectObj)) {
        return false;
      }
      // Make sure it refers to a dictionary (at least).
      PdfDictionary p = (PdfDictionary) _module.resolveIndirectObject(pref);
      PdfSimpleObject ptype = (PdfSimpleObject) p.get("Type");
      if (ptype != null) {
        String typename = ptype.getStringValue();
        if (!"StructTreeRoot".equals(typename) && !"StructElem".equals(typename)) {
          return false;
        }
      }
      // Passed all tests.
      return true;
    } catch (Exception e) {
      // Some assumption was violated
      return false;
    }
  }
Beispiel #2
0
  /** Returns <code>true</code> if the document satisfies the profile. */
  public boolean satisfiesThisProfile() {
    try {
      // First off, there must be an OutputIntents array
      // in the document catalog dictionary.
      PdfDictionary catDict = _module.getCatalogDict();
      PdfArray intentsArray =
          (PdfArray) _module.resolveIndirectObject(catDict.get("OutputIntents"));
      if (intentsArray == null) {
        return false;
      }

      // Check specific requirements on the doc info dictionary.
      if (!infoDictOK("PDF/X-3")) {
        return false;
      }

      // Next check if the OutputIntents are valid.
      if (!outputIntentsOK(intentsArray)) {
        return false;
      }

      // Check resources and other stuff.
      if (!resourcesOK()) {
        return false;
      }

      // Check the trailer dictionary.
      if (!trailerDictOK()) {
        return false;
      }

      // Encryption dictionary is not allowed.
      if (_module.getEncryptionDict() != null) {
        return false;
      }

      // Check that bounding boxes are present as required.
      // MediaBox is not required.
      if (!bboxOK(false)) {
        return false;
      }

      // If the document contains Actions, it's non-conformant
      if (_module.getActionsExist()) {
        return false;
      }
    } catch (Exception e) {
      // Any otherwise uncaught exception means nonconformance
      return false;
    }
    return true;
  }
 /* Determine if a dictionary is an object reference dictionary,
 as in table 9.12. */
 private boolean isObjectRef(PdfDictionary dict) {
   try {
     PdfSimpleObject typeObj = (PdfSimpleObject) dict.get("Type");
     if (!typeObj.getStringValue().equals("OBJR")) {
       return false;
     }
     // An Obj entry is required. Must be an indirect object.
     PdfObject obj = _module.resolveIndirectObject(dict.get("Obj"));
     if (obj == null) {
       return false;
     }
     return true;
   } catch (Exception e) {
     return false;
   }
 }
 /* Determine if a dictionary is a marked content dictionary.
 See Table 9.11 in the PDF 1.4 book. */
 private boolean isMarkedContent(PdfDictionary dict) {
   try {
     PdfSimpleObject typeObj = (PdfSimpleObject) dict.get("Type");
     if (!typeObj.getStringValue().equals("MCR")) {
       return false;
     }
     // An MCID entry is required.
     PdfSimpleObject mcidObj = (PdfSimpleObject) _module.resolveIndirectObject(dict.get("MCID"));
     if (mcidObj == null) {
       return false;
     }
     return true;
   } catch (Exception e) {
     return false;
   }
 }
 /* See if an attribute dictionary has attributes which
 are permitted only at block level. */
 private boolean attrIsBlockLevel(PdfDictionary attrDict) {
   for (int i = 0; i < blockLevelAttrs.length; i++) {
     if (attrDict.get(blockLevelAttrs[i]) != null) {
       return true;
     }
   }
   return false;
 }
  /**
   * Build this element's subtree, if any This checks the "K" entry in the dictionary and locates
   * all referened structure elements. These are put into StructureElement objects, which have their
   * own subtrees built, and these StructureElements are accumulated into <code>children</code>.
   */
  public void buildSubtree() throws PdfException {
    PdfObject k = null;
    try {
      k = _module.resolveIndirectObject(_dict.get("K"));
    } catch (IOException e) {
      throw new PdfInvalidException("Invalid data in document structure tree");
    }
    children = null;

    // The "K" element is complicated, having five variants.
    if (k instanceof PdfSimpleObject) {
      // A marked-content identifier. We don't explore further.
      return;
    } else if (k instanceof PdfDictionary) {
      // Could be any of three kinds of dictionaries:
      // - A marked-content reference dictionary
      // - A PDF object reference dictionary
      // - A structure element reference dictionary
      // The only one we check seriously is a structure element.
      PdfDictionary kdict = (PdfDictionary) k;
      if (isStructElem(kdict)) {
        StructureElement se = new StructureElement(kdict, _tree);
        se.buildSubtree();
        se.checkAttributes();
        children = new ArrayList(1);
        children.add(se);
      } else if (!isMarkedContent(kdict) && !isObjectRef(kdict)) {
        throw new PdfInvalidException("Unknown element in structure tree");
      }
    } else if (k instanceof PdfArray) {
      Vector kvec = ((PdfArray) k).getContent();
      children = new LinkedList();
      for (int i = 0; i < kvec.size(); i++) {
        PdfObject kelem = (PdfObject) kvec.elementAt(i);
        try {
          kelem = _module.resolveIndirectObject(kelem);
        } catch (IOException e) {
        }
        if (kelem instanceof PdfDictionary) {
          PdfDictionary kdict = (PdfDictionary) kelem;
          if (isStructElem(kdict)) {
            StructureElement se = new StructureElement(kdict, _tree);
            se.buildSubtree();
            se.checkAttributes();
            children.add(se);
          }
        }
      }
      // It's possible that none of the elements of the array
      // were structure elements.  In this case, we change
      // children to null rather than have to check for an
      // empty vector.
      if (children.isEmpty()) {
        children = null;
      }
    }
  }
  /* Check if an attribute dictionary is reasonable. */
  private void checkAttribute(PdfDictionary attr) throws PdfException {
    try {
      // Must have an entry named "O", whose value is a name.
      PdfSimpleObject plugin = (PdfSimpleObject) attr.get("O");
      Name tok = (Name) plugin.getToken();

      // If it has a Placement entry with a value other than
      // "Inline", then we allow block level attributes.
      PdfSimpleObject placement = (PdfSimpleObject) attr.get("Placement");
      if (placement != null && !"Inline".equals(placement.getStringValue())) {
        _structIsInline = false;
      }
      // Though I don't think the Adobe PDF bible actually
      // says so, it appears that the "attributes" are
      // simply other keys in the attribute dictionary.
      // Remember if we see attributes that can't go in BLSE's;
      // we'll check later if we're actually in a BLSE.
      if (attrIsBlockLevel(attr)) {
        _attrIsBlock = true;
      }
    } catch (Exception e) {
      throw new PdfInvalidException("Invalid attribute in document structure");
    }
  }
Beispiel #8
0
 protected boolean xObjectOK(PdfDictionary xo) {
   if (xo == null) {
     // no XObject means no problem
     return true;
   }
   // Do common tests
   if (!super.xObjectOK(xo)) {
     return false;
   }
   // OPI objects aren't permitted
   if (xo.get("OPI") != null) {
     return false;
   }
   return true;
 }
  /**
   * Constructor.
   *
   * @param dict A PdfDictionary corresponding to a structure element
   * @param tree The root StructureTree object
   */
  public StructureElement(PdfDictionary dict, StructureTree tree) throws PdfException {
    _tree = tree;
    _dict = dict;
    _module = tree.getModule();
    _structType = null;

    // If this element has a standard structure type, find it.
    try {
      PdfObject s = _module.resolveIndirectObject(dict.get("S"));
      Token tok = ((PdfSimpleObject) s).getToken();
      String st = ((Name) tok).getValue();
      st = _tree.dereferenceStructType(st);
      if (StdStructTypes.includes(st)) {
        _structType = st;
      }
    } catch (IOException e) {
    }
  }
  /**
   * Determine if the attributes of this element are valid. If errors are detected, throws a
   * PdfInvalidException.
   */
  public void checkAttributes() throws PdfException {
    final String badattr = "Invalid structure attribute";
    PdfObject attr;

    // Use the variables _structIsInline and _attrIsBlock to
    // note when we've got a block-level-only attribute in
    // an inline structure element. We initially set
    // _structIsInline based on the structure type, but this
    // may be overridden by the Placement attribute.
    // Figure elements occupy an ambiguous position, so we
    // don't mark them as ILSE's.  Also, TR, TH and TD are
    // defined to be neither BLSE's nor ILSE's.
    _attrIsBlock = false;
    _structIsInline =
        !_structType.equals("Figure")
            && !_structType.equals("TH")
            && !_structType.equals("TD")
            && !_structType.equals("TR")
            && !StdStructTypes.isBlockLevel(_structType);

    try {
      attr = _module.resolveIndirectObject(_dict.get("A"));
    } catch (Exception e) {
      throw new PdfInvalidException("Invalid structure attribute reference");
    }
    if (attr == null) {
      // no attributes is fine
      return;
    }
    if (attr instanceof PdfArray) {
      // If we have an array, it may contain elements and
      // revision numbers.  A revision number may follow
      // an element, but there doesn't have to be one.
      Vector attrVec = ((PdfArray) attr).getContent();
      for (int i = 0; i < attrVec.size(); i++) {
        PdfObject attrElem;
        try {
          attrElem = _module.resolveIndirectObject((PdfObject) attrVec.elementAt(i));
        } catch (IOException e) {
          throw new PdfInvalidException(badattr);
        }
        if (attrElem instanceof PdfDictionary) {
          checkAttribute((PdfDictionary) attrElem);
        } else if (attrElem instanceof PdfSimpleObject) {
          try {
            Numeric revnum = (Numeric) ((PdfSimpleObject) attrElem).getToken();
          } catch (Exception e) {
            throw new PdfInvalidException(badattr);
          }
        } else {
          throw new PdfInvalidException(badattr);
        }
      }
    } else if (attr instanceof PdfDictionary) {
      checkAttribute((PdfDictionary) attr);
    } else {
      throw new PdfInvalidException("Structure attribute has illegal type");
    }
    if (_structIsInline && _attrIsBlock) {
      throw new PdfInvalidException("Block-level attributes in inline structure element");
    }
  }
Beispiel #11
0
  /* Walk through the page tree and check all Resources dictionaries
    that we find.  Along the way, we check several things:

    Color spaces. Any Separation and DeviceN resources we
    find must have an AlternateSpace of DeviceGray or
    DeviceCMYK.

    Extended graphic states.

    XObjects.
  */
  private boolean resourcesOK() {
    PageTreeNode docTreeRoot = _module.getDocumentTree();
    try {
      docTreeRoot.startWalk();
      DocNode docNode;
      for (; ; ) {
        docNode = docTreeRoot.nextDocNode();
        if (docNode == null) {
          break;
        }
        // Check for node-level resources
        PdfDictionary rsrc = docNode.getResources();
        if (rsrc != null) {

          // Check extended graphics state.
          PdfDictionary gs = (PdfDictionary) _module.resolveIndirectObject(rsrc.get("ExtGState"));
          if (!extGStateOK(gs)) {
            return false;
          }

          // Check XObjects.
          PdfDictionary xo = (PdfDictionary) _module.resolveIndirectObject(rsrc.get("XObject"));
          if (!xObjectsOK(xo)) {
            return false;
          }
        }

        // Check content streams for  resources
        if (docNode instanceof PageObject) {
          List streams = ((PageObject) docNode).getContentStreams();
          if (streams != null) {
            Iterator iter = streams.listIterator();
            while (iter.hasNext()) {
              PdfStream stream = (PdfStream) iter.next();
              PdfDictionary dict = stream.getDict();
              PdfDictionary rs = (PdfDictionary) dict.get("Resources");
              if (rs != null) {
                PdfDictionary gs =
                    (PdfDictionary) _module.resolveIndirectObject(rs.get("ExtGState"));
                if (!extGStateOK(gs)) {
                  return false;
                }

                PdfDictionary xo = (PdfDictionary) _module.resolveIndirectObject(rs.get("XObject"));
                if (!xObjectsOK(xo)) {
                  return false;
                }
              }

              // Also check for filters, to make sure
              // there aren't any forbidden LZW filters.
              PdfObject filters = dict.get("Filter");
              if (!filterOK(filters, true, true)) {
                return false;
              }

              // External streams are also forbidden.
              if (dict.get("F") != null) {
                return false;
              }
            }
          }

          // Also check page objects for annotations --
          // in particular, TrapNet annotations.
          PdfArray annots = ((PageObject) docNode).getAnnotations();
          if (annots != null) {
            Vector annVec = annots.getContent();
            for (int i = 0; i < annVec.size(); i++) {
              PdfDictionary annDict =
                  (PdfDictionary) _module.resolveIndirectObject((PdfObject) annVec.elementAt(i));
              PdfSimpleObject subtypeObj = (PdfSimpleObject) annDict.get("Subtype");
              if ("TrapNet".equals(subtypeObj.getStringValue())) {
                // FontFauxing must be absent or 0-length
                PdfArray ff = (PdfArray) annDict.get("FontFauxing");
                if (ff != null) {
                  Vector ffVec = ff.getContent();
                  if (ffVec.size() > 0) {
                    return false; // a faux pas
                  }
                }
              }
            }
          }
        }
      }
    } catch (Exception e) {
      return false;
    }
    return true; // passed all tests
  }