/**
   * Attempt to filter a bitstream
   *
   * <p>An exception will be thrown if the media filter class cannot be instantiated, exceptions
   * from filtering will be logged to STDOUT and swallowed.
   *
   * @return true if bitstream processed, false if no applicable filter or already processed
   */
  public static boolean filterBitstream(Context c, Item myItem, Bitstream myBitstream)
      throws Exception {
    boolean filtered = false;

    // iterate through filter classes. A single format may be actioned
    // by more than one filter
    for (int i = 0; i < filterClasses.length; i++) {
      List fmts = (List) filterFormats.get(filterClasses[i].getClass().getName());
      if (fmts.contains(myBitstream.getFormat().getShortDescription())) {
        try {
          // only update item if bitstream not skipped
          if (filterClasses[i].processBitstream(c, myItem, myBitstream)) {
            myItem.update(); // Make sure new bitstream has a sequence
            // number
            filtered = true;
          }
        } catch (Exception e) {
          System.out.println(
              "ERROR filtering, skipping bitstream #" + myBitstream.getID() + " " + e);
          e.printStackTrace();
        }
      }
    }
    return filtered;
  }
Example #2
0
  /**
   * Remove a file from an item
   *
   * @param context current DSpace context
   * @param item Item where file should be removed from
   * @param bitstreamID The id of bitstream representing the file to remove
   * @return Status or error flag which will be processed by UI-related code! (if STATUS_COMPLETE or
   *     0 is returned, no errors occurred!)
   */
  protected int processRemoveFile(Context context, Item item, int bitstreamID)
      throws IOException, SQLException, AuthorizeException {
    Bitstream bitstream;

    // Try to find bitstream
    try {
      bitstream = Bitstream.find(context, bitstreamID);
    } catch (NumberFormatException nfe) {
      bitstream = null;
    }

    if (bitstream == null) {
      // Invalid or mangled bitstream ID
      // throw an error and return immediately
      return STATUS_INTEGRITY_ERROR;
    }

    // remove bitstream from bundle..
    // delete bundle if it's now empty
    Bundle[] bundles = bitstream.getBundles();

    bundles[0].removeBitstream(bitstream);

    Bitstream[] bitstreams = bundles[0].getBitstreams();

    // remove bundle if it's now empty
    if (bitstreams.length < 1) {
      item.removeBundle(bundles[0]);
      item.update();
    }

    // no errors occurred
    return STATUS_COMPLETE;
  }
  // Create workflow start provenance message
  private static void recordStart(Context c, Item myitem)
      throws SQLException, IOException, AuthorizeException {
    // Get non-internal format bitstreams
    Bitstream[] bitstreams = myitem.getNonInternalBitstreams();

    // get date
    DCDate now = DCDate.getCurrent();

    // Create provenance description
    String provmessage = "";

    if (myitem.getSubmitter() != null) {
      provmessage =
          "Submitted by "
              + myitem.getSubmitter().getFullName()
              + " ("
              + myitem.getSubmitter().getEmail()
              + ") on "
              + now.toString()
              + "\n";
    } else
    // null submitter
    {
      provmessage = "Submitted by unknown (probably automated) on" + now.toString() + "\n";
    }

    // add sizes and checksums of bitstreams
    provmessage += InstallItem.getBitstreamProvenanceMessage(myitem);

    // Add message to the DC
    myitem.addDC("description", "provenance", "en", provmessage);
    myitem.update();
  }
  /** Update the workflow item, including the unarchived item. */
  public void update() throws SQLException, IOException, AuthorizeException {
    // FIXME check auth
    log.info(
        LogManager.getHeader(ourContext, "update_workflow_item", "workflow_item_id=" + getID()));

    // Update the item
    item.update();

    // Update ourselves
    DatabaseManager.update(ourContext, wfRow);
  }
  /**
   * Look up the id of the template item for a given collection.
   *
   * @param context The current DSpace context.
   * @param collectionID The collection id.
   * @return The id of the template item.
   * @throws IOException
   */
  public static int getTemplateItemID(Context context, int collectionID)
      throws SQLException, AuthorizeException, IOException {
    Collection collection = Collection.find(context, collectionID);
    Item template = collection.getTemplateItem();

    if (template == null) {
      collection.createTemplateItem();
      template = collection.getTemplateItem();

      collection.update();
      template.update();
      context.commit();
    }

    return template.getID();
  }
  /**
   * rejects an item - rejection means undoing a submit - WorkspaceItem is created, and the
   * WorkflowItem is removed, user is emailed rejection_message.
   *
   * @param c Context
   * @param wi WorkflowItem to operate on
   * @param e EPerson doing the operation
   * @param rejection_message message to email to user
   */
  public static WorkspaceItem reject(
      Context c, WorkflowItem wi, EPerson e, String rejection_message)
      throws SQLException, AuthorizeException, IOException {
    // authorize a DSpaceActions.REJECT
    // stop workflow
    deleteTasks(c, wi);

    // rejection provenance
    Item myitem = wi.getItem();

    // Get current date
    String now = DCDate.getCurrent().toString();

    // Get user's name + email address
    String usersName = getEPersonName(e);

    // Here's what happened
    String provDescription =
        "Rejected by " + usersName + ", reason: " + rejection_message + " on " + now + " (GMT) ";

    // Add to item as a DC field
    myitem.addDC("description", "provenance", "en", provDescription);
    myitem.update();

    // convert into personal workspace
    WorkspaceItem wsi = returnToWorkspace(c, wi);

    // notify that it's been rejected
    notifyOfReject(c, wi, e, rejection_message);

    log.info(
        LogManager.getHeader(
            c,
            "reject_workflow",
            "workflow_item_id="
                + wi.getID()
                + "item_id="
                + wi.getItem().getID()
                + "collection_id="
                + wi.getCollection().getID()
                + "eperson_id="
                + e.getID()));

    return wsi;
  }
Example #7
0
  /*
   If we created a new Bitstream but now realised there is a problem then remove it.
  */
  private void backoutBitstream(SubmissionInfo subInfo, Bitstream b, Item item)
      throws SQLException, AuthorizeException, IOException {
    // remove bitstream from bundle..
    // delete bundle if it's now empty
    Bundle[] bnd = b.getBundles();

    bnd[0].removeBitstream(b);

    Bitstream[] bitstreams = bnd[0].getBitstreams();

    // remove bundle if it's now empty
    if (bitstreams.length < 1) {
      item.removeBundle(bnd[0]);
      item.update();
    }

    subInfo.setBitstream(null);
  }
  // Record approval provenance statement
  private static void recordApproval(Context c, WorkflowItem wi, EPerson e)
      throws SQLException, IOException, AuthorizeException {
    Item item = wi.getItem();

    // Get user's name + email address
    String usersName = getEPersonName(e);

    // Get current date
    String now = DCDate.getCurrent().toString();

    // Here's what happened
    String provDescription =
        "Approved for entry into archive by " + usersName + " on " + now + " (GMT) ";

    // add bitstream descriptions (name, size, checksums)
    provDescription += InstallItem.getBitstreamProvenanceMessage(item);

    // Add to item as a DC field
    item.addDC("description", "provenance", "en", provDescription);
    item.update();
  }
Example #9
0
  /**
   * Perform a deposit, using the supplied SWORD Deposit object.
   *
   * @param deposit
   * @throws SWORDErrorException
   * @throws DSpaceSWORDException
   */
  public DepositResult doDeposit(Deposit deposit) throws SWORDErrorException, DSpaceSWORDException {
    // get the things out of the service that we need
    Context context = swordService.getContext();
    SWORDConfiguration swordConfig = swordService.getSwordConfig();
    SWORDUrlManager urlManager = swordService.getUrlManager();

    // FIXME: the spec is unclear what to do in this situation.  I'm going
    // the throw a 415 (ERROR_CONTENT) until further notice
    //
    // determine if this is an acceptable file format
    if (!swordConfig.isAcceptableContentType(context, deposit.getContentType(), collection)) {
      log.error(
          "Unacceptable content type detected: "
              + deposit.getContentType()
              + " for collection "
              + collection.getID());
      throw new SWORDErrorException(
          ErrorCodes.ERROR_CONTENT,
          "Unacceptable content type in deposit request: " + deposit.getContentType());
    }

    // determine if this is an acceptable packaging type for the deposit
    // if not, we throw a 415 HTTP error (Unsupported Media Type, ERROR_CONTENT)
    if (!swordConfig.isSupportedMediaType(deposit.getPackaging(), this.collection)) {
      log.error(
          "Unacceptable packaging type detected: "
              + deposit.getPackaging()
              + "for collection"
              + collection.getID());
      throw new SWORDErrorException(
          ErrorCodes.ERROR_CONTENT,
          "Unacceptable packaging type in deposit request: " + deposit.getPackaging());
    }

    // Obtain the relevant ingester from the factory
    SWORDIngester si = SWORDIngesterFactory.getInstance(context, deposit, collection);
    swordService.message("Loaded ingester: " + si.getClass().getName());

    // do the deposit
    DepositResult result = si.ingest(swordService, deposit, collection);
    swordService.message("Archive ingest completed successfully");

    // if there's an item availalble, and we want to keep the original
    // then do that
    try {
      if (swordConfig.isKeepOriginal()) {
        swordService.message(
            "DSpace will store an original copy of the deposit, "
                + "as well as ingesting the item into the archive");

        // in order to be allowed to add the file back to the item, we need to ignore authorisations
        // for a moment
        boolean ignoreAuth = context.ignoreAuthorization();
        context.setIgnoreAuthorization(true);

        String bundleName = ConfigurationManager.getProperty("sword-server", "bundle.name");
        if (bundleName == null || "".equals(bundleName)) {
          bundleName = "SWORD";
        }
        Item item = result.getItem();
        Bundle[] bundles = item.getBundles(bundleName);
        Bundle swordBundle = null;
        if (bundles.length > 0) {
          swordBundle = bundles[0];
        }
        if (swordBundle == null) {
          swordBundle = item.createBundle(bundleName);
        }

        String fn = swordService.getFilename(context, deposit, true);

        Bitstream bitstream;
        FileInputStream fis = null;
        try {
          fis = new FileInputStream(deposit.getFile());
          bitstream = swordBundle.createBitstream(fis);
        } finally {
          if (fis != null) {
            fis.close();
          }
        }

        bitstream.setName(fn);
        bitstream.setDescription("SWORD deposit package");

        BitstreamFormat bf = BitstreamFormat.findByMIMEType(context, deposit.getContentType());
        if (bf != null) {
          bitstream.setFormat(bf);
        }

        bitstream.update();

        swordBundle.update();
        item.update();

        swordService.message(
            "Original package stored as " + fn + ", in item bundle " + swordBundle);

        // now reset the context ignore authorisation
        context.setIgnoreAuthorization(ignoreAuth);

        // set the media link for the created item
        result.setMediaLink(urlManager.getMediaLink(bitstream));
      } else {
        // set the vanilla media link, which doesn't resolve to anything
        result.setMediaLink(urlManager.getBaseMediaLinkUrl());
      }
    } catch (SQLException e) {
      log.error("caught exception: ", e);
      throw new DSpaceSWORDException(e);
    } catch (AuthorizeException e) {
      log.error("caught exception: ", e);
      throw new DSpaceSWORDException(e);
    } catch (FileNotFoundException e) {
      log.error("caught exception: ", e);
      throw new DSpaceSWORDException(e);
    } catch (IOException e) {
      log.error("caught exception: ", e);
      throw new DSpaceSWORDException(e);
    }

    return result;
  }
Example #10
0
  /**
   * perform the ingest using the given deposit object onto the specified target dspace object,
   * using the sword service implementation
   *
   * @param service
   * @param deposit
   * @param target
   * @return
   * @throws DSpaceSWORDException
   * @throws SWORDErrorException
   */
  public DepositResult ingest(SWORDService service, Deposit deposit, DSpaceObject target)
      throws DSpaceSWORDException, SWORDErrorException {
    try {
      if (!(target instanceof Item)) {
        throw new DSpaceSWORDException(
            "SimpleFileIngester can only be loaded for deposit onto DSpace Items");
      }
      Item item = (Item) target;

      // now set the sword service
      SWORDService swordService = service;

      // get the things out of the service that we need
      Context context = swordService.getContext();
      SWORDUrlManager urlManager = swordService.getUrlManager();

      Bundle[] bundles = item.getBundles("ORIGINAL");
      Bundle original;
      if (bundles.length > 0) {
        original = bundles[0];
      } else {
        original = item.createBundle("ORIGINAL");
      }

      Bitstream bs;
      FileInputStream fis = null;

      try {
        fis = new FileInputStream(deposit.getFile());
        bs = original.createBitstream(fis);
      } finally {
        if (fis != null) {
          fis.close();
        }
      }

      String fn = swordService.getFilename(context, deposit, false);
      bs.setName(fn);

      swordService.message("File created in item with filename " + fn);

      BitstreamFormat bf = BitstreamFormat.findByMIMEType(context, deposit.getContentType());
      if (bf != null) {
        bs.setFormat(bf);
      }

      // to do the updates, we need to ignore authorisation in the context
      boolean ignoreAuth = context.ignoreAuthorization();
      context.setIgnoreAuthorization(true);

      bs.update();
      original.update();
      item.update();

      // reset the ignore authorisation
      context.setIgnoreAuthorization(ignoreAuth);

      DepositResult result = new DepositResult();
      result.setHandle(urlManager.getBitstreamUrl(bs));
      result.setTreatment(this.getTreatment());
      result.setBitstream(bs);

      return result;
    } catch (SQLException e) {
      throw new DSpaceSWORDException(e);
    } catch (AuthorizeException e) {
      throw new DSpaceSWORDException(e);
    } catch (IOException e) {
      throw new DSpaceSWORDException(e);
    }
  }
Example #11
0
  private void processArchive(
      Context context,
      String sourceDirPath,
      String itemField,
      String metadataIndexName,
      boolean alterProvenance,
      boolean isTest)
      throws Exception {
    // open and process the source directory
    File sourceDir = new File(sourceDirPath);

    if ((sourceDir == null) || !sourceDir.exists() || !sourceDir.isDirectory()) {
      pr("Error, cannot open archive source directory " + sourceDirPath);
      throw new Exception("error with archive source directory " + sourceDirPath);
    }

    String[] dircontents = sourceDir.list(directoryFilter); // just the names, not the path
    Arrays.sort(dircontents);

    // Undo is suppressed to prevent undo of undo
    boolean suppressUndo = false;
    File fSuppressUndo = new File(sourceDir, SUPPRESS_UNDO_FILENAME);
    if (fSuppressUndo.exists()) {
      suppressUndo = true;
    }

    File undoDir = null; // sibling directory of source archive

    if (!suppressUndo && !isTest) {
      undoDir = initUndoArchive(sourceDir);
    }

    int itemCount = 0;
    int successItemCount = 0;

    for (String dirname : dircontents) {
      itemCount++;
      pr("");
      pr("processing item " + dirname);

      try {
        ItemArchive itarch = ItemArchive.create(context, new File(sourceDir, dirname), itemField);

        for (UpdateAction action : actionMgr) {
          pr("action: " + action.getClass().getName());
          action.execute(context, itarch, isTest, suppressUndo);
          if (!isTest && !suppressUndo) {
            itarch.writeUndo(undoDir);
          }
        }
        if (!isTest) {
          Item item = itarch.getItem();
          item.update(); // need to update before commit
          context.commit();
          item.decache();
        }
        ItemUpdate.pr("Item " + dirname + " completed");
        successItemCount++;
      } catch (Exception e) {
        pr("Exception processing item " + dirname + ": " + e.toString());
      }
    }

    if (!suppressUndo && !isTest) {
      StringBuilder sb = new StringBuilder("dsrun org.dspace.app.itemupdate.ItemUpdate ");
      sb.append(" -e ").append(this.eperson);
      sb.append(" -s ").append(undoDir);

      if (itemField != null) {
        sb.append(" -i ").append(itemField);
      }

      if (!alterProvenance) {
        sb.append(" -P ");
      }
      if (isTest) {
        sb.append(" -t ");
      }

      for (String actionOption : undoActionList) {
        sb.append(actionOption);
      }

      PrintWriter pw = null;
      try {
        File cmdFile = new File(undoDir.getParent(), undoDir.getName() + "_command.sh");
        pw = new PrintWriter(new BufferedWriter(new FileWriter(cmdFile)));
        pw.println(sb.toString());
      } finally {
        pw.close();
      }
    }

    pr("");
    pr(
        "Done processing.  Successful items: "
            + successItemCount
            + " of "
            + itemCount
            + " items in source archive");
    pr("");
  }
Example #12
0
  private void crosswalkPDF(Context context, Item item, InputStream metadata)
      throws CrosswalkException, IOException, SQLException, AuthorizeException {
    COSDocument cos = null;

    try {
      PDFParser parser = new PDFParser(metadata);
      parser.parse();
      cos = parser.getDocument();

      // sanity check: PDFBox breaks on encrypted documents, so give up.
      if (cos.getEncryptionDictionary() != null)
        throw new MetadataValidationException(
            "This packager cannot accept an encrypted PDF document.");

      /* PDF to DC "crosswalk":
       *
       * NOTE: This is not in a crosswalk plugin because (a) it isn't
       * useful anywhere else, and more importantly, (b) the source
       * data is not XML so it doesn't fit the plugin's interface.
       *
       * pattern of crosswalk -- PDF dict entries to DC:
       *   Title -> title.null
       *   Author -> contributor.author
       *   CreationDate -> date.created
       *   ModDate -> date.created
       *   Creator -> description.provenance (application that created orig)
       *   Producer -> description.provenance (convertor to pdf)
       *   Subject -> description.abstract
       *   Keywords -> subject.other
       *    date is java.util.Calendar
       */
      PDDocument pd = new PDDocument(cos);
      PDDocumentInformation docinfo = pd.getDocumentInformation();
      String title = docinfo.getTitle();

      // sanity check: item must have a title.
      if (title == null)
        throw new MetadataValidationException(
            "This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary.");
      log.debug("PDF Info dict title=\"" + title + "\"");
      item.addDC("title", null, "en", title);
      String value;
      Calendar date;
      if ((value = docinfo.getAuthor()) != null) {
        item.addDC("contributor", "author", null, value);
        log.debug("PDF Info dict author=\"" + value + "\"");
      }
      if ((value = docinfo.getCreator()) != null)
        item.addDC(
            "description",
            "provenance",
            "en",
            "Application that created the original document: " + value);
      if ((value = docinfo.getProducer()) != null)
        item.addDC(
            "description", "provenance", "en", "Original document converted to PDF by: " + value);
      if ((value = docinfo.getSubject()) != null)
        item.addDC("description", "abstract", null, value);
      if ((value = docinfo.getKeywords()) != null) item.addDC("subject", "other", null, value);

      // Take either CreationDate or ModDate as "date.created",
      // Too bad there's no place to put "last modified" in the DC.
      Calendar calValue;
      if ((calValue = docinfo.getCreationDate()) == null) calValue = docinfo.getModificationDate();
      if (calValue != null)
        item.addDC("date", "created", null, (new DCDate(calValue.getTime())).toString());
      item.update();
    } finally {
      if (cos != null) cos.close();
    }
  }
Example #13
0
  /**
   * Process the upload of a new file!
   *
   * @param context current DSpace context
   * @param request current servlet request object
   * @param response current servlet response object
   * @param subInfo submission info object
   * @return Status or error flag which will be processed by UI-related code! (if STATUS_COMPLETE or
   *     0 is returned, no errors occurred!)
   */
  protected int processUploadFile(
      Context context,
      HttpServletRequest request,
      HttpServletResponse response,
      SubmissionInfo subInfo)
      throws ServletException, IOException, SQLException, AuthorizeException {
    boolean formatKnown = true;
    boolean fileOK = false;
    BitstreamFormat bf = null;
    Bitstream b = null;

    // NOTE: File should already be uploaded.
    // Manakin does this automatically via Cocoon.
    // For JSP-UI, the SubmissionController.uploadFiles() does the actual upload

    Enumeration attNames = request.getAttributeNames();

    // loop through our request attributes
    while (attNames.hasMoreElements()) {
      String attr = (String) attNames.nextElement();

      // if this ends with "-path", this attribute
      // represents a newly uploaded file
      if (attr.endsWith("-path")) {
        // strip off the -path to get the actual parameter
        // that the file was uploaded as
        String param = attr.replace("-path", "");

        // Load the file's path and input stream and description
        String filePath = (String) request.getAttribute(param + "-path");
        InputStream fileInputStream = (InputStream) request.getAttribute(param + "-inputstream");

        // attempt to get description from attribute first, then direct from a parameter
        String fileDescription = (String) request.getAttribute(param + "-description");
        if (fileDescription == null || fileDescription.length() == 0) {
          fileDescription = request.getParameter("description");
        }

        // if information wasn't passed by User Interface, we had a problem
        // with the upload
        if (filePath == null || fileInputStream == null) {
          return STATUS_UPLOAD_ERROR;
        }

        if (subInfo == null) {
          // In any event, if we don't have the submission info, the request
          // was malformed
          return STATUS_INTEGRITY_ERROR;
        }

        // Create the bitstream
        Item item = subInfo.getSubmissionItem().getItem();

        // do we already have a bundle?
        Bundle[] bundles = item.getBundles("ORIGINAL");

        if (bundles.length < 1) {
          // set bundle's name to ORIGINAL
          b = item.createSingleBitstream(fileInputStream, "ORIGINAL");
        } else {
          // we have a bundle already, just add bitstream
          b = bundles[0].createBitstream(fileInputStream);
        }

        // Strip all but the last filename. It would be nice
        // to know which OS the file came from.
        String noPath = filePath;

        while (noPath.indexOf('/') > -1) {
          noPath = noPath.substring(noPath.indexOf('/') + 1);
        }

        while (noPath.indexOf('\\') > -1) {
          noPath = noPath.substring(noPath.indexOf('\\') + 1);
        }

        b.setName(noPath);
        b.setSource(filePath);
        b.setDescription(fileDescription);

        // Identify the format
        bf = FormatIdentifier.guessFormat(context, b);
        b.setFormat(bf);

        // Update to DB
        b.update();
        item.update();

        if ((bf != null) && (bf.isInternal())) {
          log.warn("Attempt to upload file format marked as internal system use only");
          backoutBitstream(subInfo, b, item);
          return STATUS_UPLOAD_ERROR;
        }

        // Check for virus
        if (ConfigurationManager.getBooleanProperty("submission-curation", "virus-scan")) {
          Curator curator = new Curator();
          curator.addTask("vscan").curate(item);
          int status = curator.getStatus("vscan");
          if (status == Curator.CURATE_ERROR) {
            backoutBitstream(subInfo, b, item);
            return STATUS_VIRUS_CHECKER_UNAVAILABLE;
          } else if (status == Curator.CURATE_FAIL) {
            backoutBitstream(subInfo, b, item);
            return STATUS_CONTAINS_VIRUS;
          }
        }

        // If we got this far then everything is more or less ok.

        // Comment - not sure if this is the right place for a commit here
        // but I'm not brave enough to remove it - Robin.
        context.commit();

        // save this bitstream to the submission info, as the
        // bitstream we're currently working with
        subInfo.setBitstream(b);

        // if format was not identified
        if (bf == null) {
          return STATUS_UNKNOWN_FORMAT;
        }
      } // end if attribute ends with "-path"
    } // end while

    return STATUS_COMPLETE;
  }