Example #1
0
  /**
   * each entry represents a bitstream....
   *
   * @param c
   * @param i
   * @param path
   * @param fileName
   * @param bundleName
   * @throws SQLException
   * @throws IOException
   * @throws AuthorizeException
   */
  private void processContentFileEntry(
      Context c, Item i, String path, String fileName, String bundleName, boolean primary)
      throws SQLException, IOException, AuthorizeException {
    String fullpath = path + File.separatorChar + fileName;

    // get an input stream
    BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullpath));

    Bitstream bs = null;
    String newBundleName = bundleName;

    if (bundleName == null) {
      // is it license.txt?
      if ("license.txt".equals(fileName)) {
        newBundleName = "LICENSE";
      } else {
        // call it ORIGINAL
        newBundleName = "ORIGINAL";
      }
    }

    if (!isTest) {
      // find the bundle
      Bundle[] bundles = i.getBundles(newBundleName);
      Bundle targetBundle = null;

      if (bundles.length < 1) {
        // not found, create a new one
        targetBundle = i.createBundle(newBundleName);
      } else {
        // put bitstreams into first bundle
        targetBundle = bundles[0];
      }

      // now add the bitstream
      bs = targetBundle.createBitstream(bis);

      bs.setName(fileName);

      // Identify the format
      // FIXME - guessing format guesses license.txt incorrectly as a text
      // file format!
      BitstreamFormat bf = FormatIdentifier.guessFormat(c, bs);
      bs.setFormat(bf);

      // Is this a the primary bitstream?
      if (primary) {
        targetBundle.setPrimaryBitstreamID(bs.getID());
        targetBundle.update();
      }

      bs.update();
    }

    bis.close();
  }
Example #2
0
  /**
   * Register the bitstream file into DSpace
   *
   * @param c
   * @param i
   * @param assetstore
   * @param bitstreamPath the full filepath expressed in the contents file
   * @param bundleName
   * @throws SQLException
   * @throws IOException
   * @throws AuthorizeException
   */
  private void registerBitstream(
      Context c, Item i, int assetstore, String bitstreamPath, String bundleName)
      throws SQLException, IOException, AuthorizeException {
    // TODO validate assetstore number
    // TODO make sure the bitstream is there

    Bitstream bs = null;
    String newBundleName = bundleName;

    if (bundleName == null) {
      // is it license.txt?
      if (bitstreamPath.endsWith("license.txt")) {
        newBundleName = "LICENSE";
      } else {
        // call it ORIGINAL
        newBundleName = "ORIGINAL";
      }
    }

    if (!isTest) {
      // find the bundle
      Bundle[] bundles = i.getBundles(newBundleName);
      Bundle targetBundle = null;

      if (bundles.length < 1) {
        // not found, create a new one
        targetBundle = i.createBundle(newBundleName);
      } else {
        // put bitstreams into first bundle
        targetBundle = bundles[0];
      }

      // now add the bitstream
      bs = targetBundle.registerBitstream(assetstore, bitstreamPath);

      // set the name to just the filename
      int iLastSlash = bitstreamPath.lastIndexOf('/');
      bs.setName(bitstreamPath.substring(iLastSlash + 1));

      // Identify the format
      // FIXME - guessing format guesses license.txt incorrectly as a text file format!
      BitstreamFormat bf = FormatIdentifier.guessFormat(c, bs);
      bs.setFormat(bf);

      bs.update();
    }
  }
  /**
   * Create a new DSpace item out of a METS content package. All contents are dictated by the METS
   * manifest. Package is a ZIP archive, all files relative to top level and the manifest (as per
   * spec) in mets.xml.
   *
   * @param context - DSpace context.
   * @param collection - collection under which to create new item.
   * @param pkg - input stream containing package to ingest.
   * @param license - may be null, which takes default license.
   * @return workspace item created by ingest.
   * @throws PackageValidationException if package is unacceptable or there is a fatal error turning
   *     it into an Item.
   */
  public WorkspaceItem ingest(
      Context context,
      Collection collection,
      InputStream pkg,
      PackageParameters params,
      String license)
      throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException,
          IOException {
    BitstreamDAO bsDAO = BitstreamDAOFactory.getInstance(context);
    BitstreamFormatDAO bfDAO = BitstreamFormatDAOFactory.getInstance(context);
    BundleDAO bundleDAO = BundleDAOFactory.getInstance(context);
    WorkspaceItemDAO wsiDAO = WorkspaceItemDAOFactory.getInstance(context);

    ZipInputStream zip = new ZipInputStream(pkg);
    HashMap fileIdToBitstream = new HashMap();
    WorkspaceItem wi = null;
    boolean success = false;
    HashSet packageFiles = new HashSet();

    boolean validate = params.getBooleanProperty("validate", true);

    try {
      /* 1. Read all the files in the Zip into bitstreams first,
       *  because we only get to take one pass through a Zip input
       *  stream.  Give them temporary bitstream names corresponding
       *  to the same names they had in the Zip, since those MUST
       *  match the URL references in <Flocat> and <mdRef> elements.
       */
      METSManifest manifest = null;
      wi = wsiDAO.create(collection, false);
      Item item = wi.getItem();
      Bundle contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME);
      Bundle mdBundle = null;
      ZipEntry ze;
      while ((ze = zip.getNextEntry()) != null) {
        if (ze.isDirectory()) continue;
        Bitstream bs = null;
        String fname = ze.getName();
        if (fname.equals(MANIFEST_FILE)) {
          if (preserveManifest) {
            mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME);
            bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip));
            bs.setName(fname);
            bs.setSource(fname);

            // Get magic bitstream format to identify manifest.
            BitstreamFormat manifestFormat = null;
            manifestFormat =
                PackageUtils.findOrCreateBitstreamFormat(
                    context,
                    MANIFEST_BITSTREAM_FORMAT,
                    "application/xml",
                    MANIFEST_BITSTREAM_FORMAT + " package manifest");
            bs.setFormat(manifestFormat);

            manifest = METSManifest.create(bs.retrieve(), validate);
          } else {
            manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate);
            continue;
          }
        } else {
          bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip));
          bs.setSource(fname);
          bs.setName(fname);
        }
        packageFiles.add(fname);
        bs.setSource(fname);
        bsDAO.update(bs);
      }
      zip.close();

      if (manifest == null)
        throw new PackageValidationException(
            "No METS Manifest found (filename=" + MANIFEST_FILE + ").  Package is unacceptable.");

      // initial sanity checks on manifest (in subclass)
      checkManifest(manifest);

      /* 2. Grovel a file list out of METS Manifest and compare
       *  it to the files in package, as an integrity test.
       */
      List manifestContentFiles = manifest.getContentFiles();

      // Compare manifest files with the ones found in package:
      //  a. Start with content files (mentioned in <fileGrp>s)
      HashSet missingFiles = new HashSet();
      for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) {
        // First locate corresponding Bitstream and make
        // map of Bitstream to <file> ID.
        Element mfile = (Element) mi.next();
        String mfileId = mfile.getAttributeValue("ID");
        if (mfileId == null)
          throw new PackageValidationException(
              "Invalid METS Manifest: file element without ID attribute.");
        String path = METSManifest.getFileName(mfile);
        Bitstream bs = contentBundle.getBitstreamByName(path);
        if (bs == null) {
          log.warn(
              "Cannot find bitstream for filename=\""
                  + path
                  + "\", skipping it..may cause problems later.");
          missingFiles.add(path);
        } else {
          fileIdToBitstream.put(mfileId, bs);

          // Now that we're done using Name to match to <file>,
          // set default bitstream Name to last path element;
          // Zip entries all have '/' pathname separators
          // NOTE: set default here, hopefully crosswalk of
          // a bitstream techMD section will override it.
          String fname = bs.getName();
          int lastSlash = fname.lastIndexOf('/');
          if (lastSlash >= 0 && lastSlash + 1 < fname.length())
            bs.setName(fname.substring(lastSlash + 1));

          // Set Default bitstream format:
          //  1. attempt to guess from MIME type
          //  2. if that fails, guess from "name" extension.
          String mimeType = mfile.getAttributeValue("MIMETYPE");
          BitstreamFormat bf = (mimeType == null) ? null : bfDAO.retrieveByMimeType(mimeType);
          if (bf == null) bf = FormatIdentifier.guessFormat(context, bs);
          bs.setFormat(bf);

          // if this bitstream belongs in another Bundle, move it:
          String bundleName = manifest.getBundleName(mfile);
          if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) {
            Bundle bn;
            Bundle bns[] = item.getBundles(bundleName);
            if (bns != null && bns.length > 0) bn = bns[0];
            else bn = item.createBundle(bundleName);
            bn.addBitstream(bs);
            contentBundle.removeBitstream(bs);
          }

          // finally, build compare lists by deleting matches.
          if (packageFiles.contains(path)) packageFiles.remove(path);
          else missingFiles.add(path);
        }
      }

      //  b. Process files mentioned in <mdRef>s - check and move
      //     to METADATA bundle.
      for (Iterator mi = manifest.getMdFiles().iterator(); mi.hasNext(); ) {
        Element mdref = (Element) mi.next();
        String path = METSManifest.getFileName(mdref);

        // finally, build compare lists by deleting matches.
        if (packageFiles.contains(path)) packageFiles.remove(path);
        else missingFiles.add(path);

        // if there is a bitstream with that name in Content, move
        // it to the Metadata bundle:
        Bitstream mdbs = contentBundle.getBitstreamByName(path);
        if (mdbs != null) {
          if (mdBundle == null) mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME);
          mdBundle.addBitstream(mdbs);
          contentBundle.removeBitstream(mdbs);
        }
      }

      // KLUDGE: make sure Manifest file doesn't get flagged as missing
      // or extra, since it won't be mentioned in the manifest.
      if (packageFiles.contains(MANIFEST_FILE)) packageFiles.remove(MANIFEST_FILE);

      // Give subclass a chance to refine the lists of in-package
      // and missing files, delete extraneous files, etc.
      checkPackageFiles(packageFiles, missingFiles, manifest);

      // Any discrepency in file lists is a fatal error:
      if (!(packageFiles.isEmpty() && missingFiles.isEmpty())) {
        StringBuffer msg =
            new StringBuffer("Package is unacceptable: contents do not match manifest.");
        if (!missingFiles.isEmpty()) {
          msg.append("\nPackage is missing these files listed in Manifest:");
          for (Iterator mi = missingFiles.iterator(); mi.hasNext(); )
            msg.append("\n\t" + (String) mi.next());
        }
        if (!packageFiles.isEmpty()) {
          msg.append("\nPackage contains extra files NOT in manifest:");
          for (Iterator mi = packageFiles.iterator(); mi.hasNext(); )
            msg.append("\n\t" + (String) mi.next());
        }
        throw new PackageValidationException(msg.toString());
      }

      /* 3. crosswalk the metadata
       */
      // get mdref'd streams from "callback" object.
      MdrefManager callback = new MdrefManager(mdBundle);

      chooseItemDmd(context, item, manifest, callback, manifest.getItemDmds());

      // crosswalk content bitstreams too.
      for (Iterator ei = fileIdToBitstream.entrySet().iterator(); ei.hasNext(); ) {
        Map.Entry ee = (Map.Entry) ei.next();
        manifest.crosswalkBitstream(
            context, (Bitstream) ee.getValue(), (String) ee.getKey(), callback);
      }

      // Take a second pass over files to correct names of derived files
      // (e.g. thumbnails, extracted text) to what DSpace expects:
      for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) {
        Element mfile = (Element) mi.next();
        String bundleName = manifest.getBundleName(mfile);
        if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) {
          Element origFile = manifest.getOriginalFile(mfile);
          if (origFile != null) {
            String ofileId = origFile.getAttributeValue("ID");
            Bitstream obs = (Bitstream) fileIdToBitstream.get(ofileId);
            String newName = makeDerivedFilename(bundleName, obs.getName());
            if (newName != null) {
              String mfileId = mfile.getAttributeValue("ID");
              Bitstream bs = (Bitstream) fileIdToBitstream.get(mfileId);
              bs.setName(newName);
              bsDAO.update(bs);
            }
          }
        }
      }

      // Sanity-check the resulting metadata on the Item:
      PackageUtils.checkMetadata(item);

      /* 4. Set primary bitstream; same Bundle
       */
      Element pbsFile = manifest.getPrimaryBitstream();
      if (pbsFile != null) {
        Bitstream pbs = (Bitstream) fileIdToBitstream.get(pbsFile.getAttributeValue("ID"));
        if (pbs == null)
          log.error(
              "Got Primary Bitstream file ID="
                  + pbsFile.getAttributeValue("ID")
                  + ", but found no corresponding bitstream.");
        else {
          List<Bundle> bn = bundleDAO.getBundles(pbs);
          if (bn.size() > 0) bn.get(0).setPrimaryBitstreamID(pbs.getID());
          else log.error("Sanity check, got primary bitstream without any parent bundle.");
        }
      }

      // have subclass manage license since it may be extra package file.
      addLicense(context, collection, item, manifest, callback, license);

      // subclass hook for final checks and rearrangements
      finishItem(context, item);

      // commit any changes to bundles
      Bundle allBn[] = item.getBundles();
      for (int i = 0; i < allBn.length; ++i) {
        bundleDAO.update(allBn[i]);
      }

      wsiDAO.update(wi);
      success = true;
      log.info(
          LogManager.getHeader(
              context,
              "ingest",
              "Created new Item, db ID="
                  + String.valueOf(item.getID())
                  + ", WorkspaceItem ID="
                  + String.valueOf(wi.getID())));
      return wi;
    } catch (SQLException se) {
      // disable attempt to delete the workspace object, since
      // database may have suffered a fatal error and the
      // transaction rollback will get rid of it anyway.
      wi = null;

      // Pass this exception on to the next handler.
      throw se;
    } finally {
      // kill item (which also deletes bundles, bitstreams) if ingest fails
      if (!success && wi != null) wsiDAO.deleteAll(wi.getID());
    }
  }
Example #4
0
  /**
   * Process the upload of a new file!
   *
   * @param context current DSpace context
   * @param request current servlet request object
   * @param response current servlet response object
   * @param subInfo submission info object
   * @return Status or error flag which will be processed by UI-related code! (if STATUS_COMPLETE or
   *     0 is returned, no errors occurred!)
   */
  protected int processUploadFile(
      Context context,
      HttpServletRequest request,
      HttpServletResponse response,
      SubmissionInfo subInfo)
      throws ServletException, IOException, SQLException, AuthorizeException {
    boolean formatKnown = true;
    boolean fileOK = false;
    BitstreamFormat bf = null;
    Bitstream b = null;

    // NOTE: File should already be uploaded.
    // Manakin does this automatically via Cocoon.
    // For JSP-UI, the SubmissionController.uploadFiles() does the actual upload

    Enumeration attNames = request.getAttributeNames();

    // loop through our request attributes
    while (attNames.hasMoreElements()) {
      String attr = (String) attNames.nextElement();

      // if this ends with "-path", this attribute
      // represents a newly uploaded file
      if (attr.endsWith("-path")) {
        // strip off the -path to get the actual parameter
        // that the file was uploaded as
        String param = attr.replace("-path", "");

        // Load the file's path and input stream and description
        String filePath = (String) request.getAttribute(param + "-path");
        InputStream fileInputStream = (InputStream) request.getAttribute(param + "-inputstream");

        // attempt to get description from attribute first, then direct from a parameter
        String fileDescription = (String) request.getAttribute(param + "-description");
        if (fileDescription == null || fileDescription.length() == 0) {
          fileDescription = request.getParameter("description");
        }

        // if information wasn't passed by User Interface, we had a problem
        // with the upload
        if (filePath == null || fileInputStream == null) {
          return STATUS_UPLOAD_ERROR;
        }

        if (subInfo == null) {
          // In any event, if we don't have the submission info, the request
          // was malformed
          return STATUS_INTEGRITY_ERROR;
        }

        // Create the bitstream
        Item item = subInfo.getSubmissionItem().getItem();

        // do we already have a bundle?
        Bundle[] bundles = item.getBundles("ORIGINAL");

        if (bundles.length < 1) {
          // set bundle's name to ORIGINAL
          b = item.createSingleBitstream(fileInputStream, "ORIGINAL");
        } else {
          // we have a bundle already, just add bitstream
          b = bundles[0].createBitstream(fileInputStream);
        }

        // Strip all but the last filename. It would be nice
        // to know which OS the file came from.
        String noPath = filePath;

        while (noPath.indexOf('/') > -1) {
          noPath = noPath.substring(noPath.indexOf('/') + 1);
        }

        while (noPath.indexOf('\\') > -1) {
          noPath = noPath.substring(noPath.indexOf('\\') + 1);
        }

        b.setName(noPath);
        b.setSource(filePath);
        b.setDescription(fileDescription);

        // Identify the format
        bf = FormatIdentifier.guessFormat(context, b);
        b.setFormat(bf);

        // Update to DB
        b.update();
        item.update();

        if ((bf != null) && (bf.isInternal())) {
          log.warn("Attempt to upload file format marked as internal system use only");
          backoutBitstream(subInfo, b, item);
          return STATUS_UPLOAD_ERROR;
        }

        // Check for virus
        if (ConfigurationManager.getBooleanProperty("submission-curation", "virus-scan")) {
          Curator curator = new Curator();
          curator.addTask("vscan").curate(item);
          int status = curator.getStatus("vscan");
          if (status == Curator.CURATE_ERROR) {
            backoutBitstream(subInfo, b, item);
            return STATUS_VIRUS_CHECKER_UNAVAILABLE;
          } else if (status == Curator.CURATE_FAIL) {
            backoutBitstream(subInfo, b, item);
            return STATUS_CONTAINS_VIRUS;
          }
        }

        // If we got this far then everything is more or less ok.

        // Comment - not sure if this is the right place for a commit here
        // but I'm not brave enough to remove it - Robin.
        context.commit();

        // save this bitstream to the submission info, as the
        // bitstream we're currently working with
        subInfo.setBitstream(b);

        // if format was not identified
        if (bf == null) {
          return STATUS_UNKNOWN_FORMAT;
        }
      } // end if attribute ends with "-path"
    } // end while

    return STATUS_COMPLETE;
  }