/**
   * Delete bitstream from item
   *
   * @param context
   * @param ItemArchive
   * @param isTest
   * @param suppressUndo
   * @throws IllegalArgumentException
   * @throws ParseException
   * @throws IOException
   * @throws AuthorizeException
   * @throws SQLException
   */
  public void execute(Context context, ItemArchive itarch, boolean isTest, boolean suppressUndo)
      throws IllegalArgumentException, IOException, SQLException, AuthorizeException,
          ParseException {
    File f = new File(itarch.getDirectory(), ItemUpdate.DELETE_CONTENTS_FILE);
    if (!f.exists()) {
      ItemUpdate.pr(
          "Warning: Delete_contents file for item " + itarch.getDirectoryName() + " not found.");
    } else {
      List<Integer> list = MetadataUtilities.readDeleteContentsFile(f);
      if (list.isEmpty()) {
        ItemUpdate.pr("Warning: empty delete_contents file for item " + itarch.getDirectoryName());
      } else {
        for (int id : list) {
          try {
            Bitstream bs = Bitstream.find(context, id);
            if (bs == null) {
              ItemUpdate.pr("Bitstream not found by id: " + id);
            } else {
              Bundle[] bundles = bs.getBundles();
              for (Bundle b : bundles) {
                if (isTest) {
                  ItemUpdate.pr("Delete bitstream with id = " + id);
                } else {
                  b.removeBitstream(bs);
                  ItemUpdate.pr("Deleted bitstream with id = " + id);
                }
              }

              if (alterProvenance) {
                DtoMetadata dtom = DtoMetadata.create("dc.description.provenance", "en", "");

                String append =
                    "Bitstream " + bs.getName() + " deleted on " + DCDate.getCurrent() + "; ";
                Item item = bundles[0].getItems()[0];
                ItemUpdate.pr("Append provenance with: " + append);

                if (!isTest) {
                  MetadataUtilities.appendMetadata(item, dtom, false, append);
                }
              }
            }
          } catch (SQLException e) {
            ItemUpdate.pr("Error finding bitstream from id: " + id + " : " + e.toString());
          }
        }
      }
    }
  }
  /**
   * Process Item, correcting CC-License if encountered.
   *
   * @param item
   * @throws SQLException
   * @throws AuthorizeException
   * @throws IOException
   */
  protected static void handleItem(Item item) throws SQLException, AuthorizeException, IOException {
    Bundle[] bundles = item.getBundles("CC-LICENSE");

    if (bundles == null || bundles.length == 0) return;

    Bundle bundle = bundles[0];

    Bitstream bitstream = bundle.getBitstreamByName("license_rdf");

    String license_rdf = new String(copy(bitstream));

    /* quickly fix xml by ripping out offensive parts */
    license_rdf = license_rdf.replaceFirst("<license", "");
    license_rdf = license_rdf.replaceFirst("</license>", "");

    StringWriter result = new StringWriter();

    try {
      templates
          .newTransformer()
          .transform(
              new StreamSource(new ByteArrayInputStream(license_rdf.getBytes())),
              new StreamResult(result));
    } catch (TransformerException e) {
      throw new RuntimeException(e.getMessage(), e);
    }

    StringBuffer buffer = result.getBuffer();

    Bitstream newBitstream =
        bundle.createBitstream(new ByteArrayInputStream(buffer.toString().getBytes()));

    newBitstream.setName(bitstream.getName());
    newBitstream.setDescription(bitstream.getDescription());
    newBitstream.setFormat(bitstream.getFormat());
    newBitstream.setSource(bitstream.getSource());
    newBitstream.setUserFormatDescription(bitstream.getUserFormatDescription());
    newBitstream.update();

    bundle.removeBitstream(bitstream);

    bundle.update();
  }
  /**
   * Create a new DSpace item out of a METS content package. All contents are dictated by the METS
   * manifest. Package is a ZIP archive, all files relative to top level and the manifest (as per
   * spec) in mets.xml.
   *
   * @param context - DSpace context.
   * @param collection - collection under which to create new item.
   * @param pkg - input stream containing package to ingest.
   * @param license - may be null, which takes default license.
   * @return workspace item created by ingest.
   * @throws PackageValidationException if package is unacceptable or there is a fatal error turning
   *     it into an Item.
   */
  public WorkspaceItem ingest(
      Context context,
      Collection collection,
      InputStream pkg,
      PackageParameters params,
      String license)
      throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException,
          IOException {
    BitstreamDAO bsDAO = BitstreamDAOFactory.getInstance(context);
    BitstreamFormatDAO bfDAO = BitstreamFormatDAOFactory.getInstance(context);
    BundleDAO bundleDAO = BundleDAOFactory.getInstance(context);
    WorkspaceItemDAO wsiDAO = WorkspaceItemDAOFactory.getInstance(context);

    ZipInputStream zip = new ZipInputStream(pkg);
    HashMap fileIdToBitstream = new HashMap();
    WorkspaceItem wi = null;
    boolean success = false;
    HashSet packageFiles = new HashSet();

    boolean validate = params.getBooleanProperty("validate", true);

    try {
      /* 1. Read all the files in the Zip into bitstreams first,
       *  because we only get to take one pass through a Zip input
       *  stream.  Give them temporary bitstream names corresponding
       *  to the same names they had in the Zip, since those MUST
       *  match the URL references in <Flocat> and <mdRef> elements.
       */
      METSManifest manifest = null;
      wi = wsiDAO.create(collection, false);
      Item item = wi.getItem();
      Bundle contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME);
      Bundle mdBundle = null;
      ZipEntry ze;
      while ((ze = zip.getNextEntry()) != null) {
        if (ze.isDirectory()) continue;
        Bitstream bs = null;
        String fname = ze.getName();
        if (fname.equals(MANIFEST_FILE)) {
          if (preserveManifest) {
            mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME);
            bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip));
            bs.setName(fname);
            bs.setSource(fname);

            // Get magic bitstream format to identify manifest.
            BitstreamFormat manifestFormat = null;
            manifestFormat =
                PackageUtils.findOrCreateBitstreamFormat(
                    context,
                    MANIFEST_BITSTREAM_FORMAT,
                    "application/xml",
                    MANIFEST_BITSTREAM_FORMAT + " package manifest");
            bs.setFormat(manifestFormat);

            manifest = METSManifest.create(bs.retrieve(), validate);
          } else {
            manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate);
            continue;
          }
        } else {
          bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip));
          bs.setSource(fname);
          bs.setName(fname);
        }
        packageFiles.add(fname);
        bs.setSource(fname);
        bsDAO.update(bs);
      }
      zip.close();

      if (manifest == null)
        throw new PackageValidationException(
            "No METS Manifest found (filename=" + MANIFEST_FILE + ").  Package is unacceptable.");

      // initial sanity checks on manifest (in subclass)
      checkManifest(manifest);

      /* 2. Grovel a file list out of METS Manifest and compare
       *  it to the files in package, as an integrity test.
       */
      List manifestContentFiles = manifest.getContentFiles();

      // Compare manifest files with the ones found in package:
      //  a. Start with content files (mentioned in <fileGrp>s)
      HashSet missingFiles = new HashSet();
      for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) {
        // First locate corresponding Bitstream and make
        // map of Bitstream to <file> ID.
        Element mfile = (Element) mi.next();
        String mfileId = mfile.getAttributeValue("ID");
        if (mfileId == null)
          throw new PackageValidationException(
              "Invalid METS Manifest: file element without ID attribute.");
        String path = METSManifest.getFileName(mfile);
        Bitstream bs = contentBundle.getBitstreamByName(path);
        if (bs == null) {
          log.warn(
              "Cannot find bitstream for filename=\""
                  + path
                  + "\", skipping it..may cause problems later.");
          missingFiles.add(path);
        } else {
          fileIdToBitstream.put(mfileId, bs);

          // Now that we're done using Name to match to <file>,
          // set default bitstream Name to last path element;
          // Zip entries all have '/' pathname separators
          // NOTE: set default here, hopefully crosswalk of
          // a bitstream techMD section will override it.
          String fname = bs.getName();
          int lastSlash = fname.lastIndexOf('/');
          if (lastSlash >= 0 && lastSlash + 1 < fname.length())
            bs.setName(fname.substring(lastSlash + 1));

          // Set Default bitstream format:
          //  1. attempt to guess from MIME type
          //  2. if that fails, guess from "name" extension.
          String mimeType = mfile.getAttributeValue("MIMETYPE");
          BitstreamFormat bf = (mimeType == null) ? null : bfDAO.retrieveByMimeType(mimeType);
          if (bf == null) bf = FormatIdentifier.guessFormat(context, bs);
          bs.setFormat(bf);

          // if this bitstream belongs in another Bundle, move it:
          String bundleName = manifest.getBundleName(mfile);
          if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) {
            Bundle bn;
            Bundle bns[] = item.getBundles(bundleName);
            if (bns != null && bns.length > 0) bn = bns[0];
            else bn = item.createBundle(bundleName);
            bn.addBitstream(bs);
            contentBundle.removeBitstream(bs);
          }

          // finally, build compare lists by deleting matches.
          if (packageFiles.contains(path)) packageFiles.remove(path);
          else missingFiles.add(path);
        }
      }

      //  b. Process files mentioned in <mdRef>s - check and move
      //     to METADATA bundle.
      for (Iterator mi = manifest.getMdFiles().iterator(); mi.hasNext(); ) {
        Element mdref = (Element) mi.next();
        String path = METSManifest.getFileName(mdref);

        // finally, build compare lists by deleting matches.
        if (packageFiles.contains(path)) packageFiles.remove(path);
        else missingFiles.add(path);

        // if there is a bitstream with that name in Content, move
        // it to the Metadata bundle:
        Bitstream mdbs = contentBundle.getBitstreamByName(path);
        if (mdbs != null) {
          if (mdBundle == null) mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME);
          mdBundle.addBitstream(mdbs);
          contentBundle.removeBitstream(mdbs);
        }
      }

      // KLUDGE: make sure Manifest file doesn't get flagged as missing
      // or extra, since it won't be mentioned in the manifest.
      if (packageFiles.contains(MANIFEST_FILE)) packageFiles.remove(MANIFEST_FILE);

      // Give subclass a chance to refine the lists of in-package
      // and missing files, delete extraneous files, etc.
      checkPackageFiles(packageFiles, missingFiles, manifest);

      // Any discrepency in file lists is a fatal error:
      if (!(packageFiles.isEmpty() && missingFiles.isEmpty())) {
        StringBuffer msg =
            new StringBuffer("Package is unacceptable: contents do not match manifest.");
        if (!missingFiles.isEmpty()) {
          msg.append("\nPackage is missing these files listed in Manifest:");
          for (Iterator mi = missingFiles.iterator(); mi.hasNext(); )
            msg.append("\n\t" + (String) mi.next());
        }
        if (!packageFiles.isEmpty()) {
          msg.append("\nPackage contains extra files NOT in manifest:");
          for (Iterator mi = packageFiles.iterator(); mi.hasNext(); )
            msg.append("\n\t" + (String) mi.next());
        }
        throw new PackageValidationException(msg.toString());
      }

      /* 3. crosswalk the metadata
       */
      // get mdref'd streams from "callback" object.
      MdrefManager callback = new MdrefManager(mdBundle);

      chooseItemDmd(context, item, manifest, callback, manifest.getItemDmds());

      // crosswalk content bitstreams too.
      for (Iterator ei = fileIdToBitstream.entrySet().iterator(); ei.hasNext(); ) {
        Map.Entry ee = (Map.Entry) ei.next();
        manifest.crosswalkBitstream(
            context, (Bitstream) ee.getValue(), (String) ee.getKey(), callback);
      }

      // Take a second pass over files to correct names of derived files
      // (e.g. thumbnails, extracted text) to what DSpace expects:
      for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) {
        Element mfile = (Element) mi.next();
        String bundleName = manifest.getBundleName(mfile);
        if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) {
          Element origFile = manifest.getOriginalFile(mfile);
          if (origFile != null) {
            String ofileId = origFile.getAttributeValue("ID");
            Bitstream obs = (Bitstream) fileIdToBitstream.get(ofileId);
            String newName = makeDerivedFilename(bundleName, obs.getName());
            if (newName != null) {
              String mfileId = mfile.getAttributeValue("ID");
              Bitstream bs = (Bitstream) fileIdToBitstream.get(mfileId);
              bs.setName(newName);
              bsDAO.update(bs);
            }
          }
        }
      }

      // Sanity-check the resulting metadata on the Item:
      PackageUtils.checkMetadata(item);

      /* 4. Set primary bitstream; same Bundle
       */
      Element pbsFile = manifest.getPrimaryBitstream();
      if (pbsFile != null) {
        Bitstream pbs = (Bitstream) fileIdToBitstream.get(pbsFile.getAttributeValue("ID"));
        if (pbs == null)
          log.error(
              "Got Primary Bitstream file ID="
                  + pbsFile.getAttributeValue("ID")
                  + ", but found no corresponding bitstream.");
        else {
          List<Bundle> bn = bundleDAO.getBundles(pbs);
          if (bn.size() > 0) bn.get(0).setPrimaryBitstreamID(pbs.getID());
          else log.error("Sanity check, got primary bitstream without any parent bundle.");
        }
      }

      // have subclass manage license since it may be extra package file.
      addLicense(context, collection, item, manifest, callback, license);

      // subclass hook for final checks and rearrangements
      finishItem(context, item);

      // commit any changes to bundles
      Bundle allBn[] = item.getBundles();
      for (int i = 0; i < allBn.length; ++i) {
        bundleDAO.update(allBn[i]);
      }

      wsiDAO.update(wi);
      success = true;
      log.info(
          LogManager.getHeader(
              context,
              "ingest",
              "Created new Item, db ID="
                  + String.valueOf(item.getID())
                  + ", WorkspaceItem ID="
                  + String.valueOf(wi.getID())));
      return wi;
    } catch (SQLException se) {
      // disable attempt to delete the workspace object, since
      // database may have suffered a fatal error and the
      // transaction rollback will get rid of it anyway.
      wi = null;

      // Pass this exception on to the next handler.
      throw se;
    } finally {
      // kill item (which also deletes bundles, bitstreams) if ingest fails
      if (!success && wi != null) wsiDAO.deleteAll(wi.getID());
    }
  }
Ejemplo n.º 4
0
  /**
   * Create new Item out of the ingested package, in the indicated collection. It creates a
   * workspace item, which the application can then install if it chooses to bypass Workflow.
   *
   * <p>This is a VERY crude import of a single Adobe PDF (Portable Document Format) file, using the
   * document's embedded metadata for package metadata. If the PDF file hasn't got the minimal
   * metadata available, it is rejected.
   *
   * <p>
   *
   * @param context DSpace context.
   * @param collection collection under which to create new item.
   * @param pkg input stream containing package to ingest.
   * @param params package parameters (none recognized)
   * @param license may be null, which takes default license.
   * @return workspace item created by ingest.
   * @throws PackageException if package is unacceptable or there is a fatal error turning it into
   *     an Item.
   */
  public WorkspaceItem ingest(
      Context context,
      Collection collection,
      InputStream pkg,
      PackageParameters params,
      String license)
      throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException,
          IOException {
    InputStream bis = null;
    COSDocument cos = null;
    boolean success = false;
    Bundle original = null;
    Bitstream bs = null;
    WorkspaceItem wi = null;

    /**
     * XXX comment out for now // XXX for debugging of parameter handling if (params != null) {
     * Enumeration pe = params.propertyNames(); while (pe.hasMoreElements()) { String name =
     * (String)pe.nextElement(); String v[] = params.getProperties(name); StringBuffer msg = new
     * StringBuffer("PackageParam: "); msg.append(name).append(" = "); for (int i = 0; i < v.length;
     * ++i) { if (i > 0) msg.append(", "); msg.append(v[i]); } log.debug(msg); } }
     */
    try {
      // Save the PDF in a bitstream first, since the parser
      // has to read it as well, and we cannot "rewind" it after that.
      wi = WorkspaceItem.create(context, collection, false);
      Item myitem = wi.getItem();
      original = myitem.createBundle("ORIGINAL");
      bs = original.createBitstream(pkg);
      pkg.close();
      bs.setName("package.pdf");
      setFormatToMIMEType(context, bs, "application/pdf");
      bs.update();
      log.debug("Created bitstream ID=" + String.valueOf(bs.getID()) + ", parsing...");

      crosswalkPDF(context, myitem, bs.retrieve());

      wi.update();
      context.commit();
      success = true;
      log.info(
          LogManager.getHeader(
              context,
              "ingest",
              "Created new Item, db ID="
                  + String.valueOf(myitem.getID())
                  + ", WorkspaceItem ID="
                  + String.valueOf(wi.getID())));
      return wi;
    } finally {
      try {
        // Close bitstream input stream and PDF file.
        if (bis != null) bis.close();
        if (cos != null) cos.close();
      } catch (IOException ie) {
      }

      // get rid of bitstream and item if ingest fails
      if (!success) {
        if (original != null && bs != null) original.removeBitstream(bs);
        if (wi != null) wi.deleteAll();
      }
      context.commit();
    }
  }