/** * Attempt to filter a bitstream * * <p>An exception will be thrown if the media filter class cannot be instantiated, exceptions * from filtering will be logged to STDOUT and swallowed. * * @return true if bitstream processed, false if no applicable filter or already processed */ public static boolean filterBitstream(Context c, Item myItem, Bitstream myBitstream) throws Exception { boolean filtered = false; // iterate through filter classes. A single format may be actioned // by more than one filter for (int i = 0; i < filterClasses.length; i++) { List fmts = (List) filterFormats.get(filterClasses[i].getClass().getName()); if (fmts.contains(myBitstream.getFormat().getShortDescription())) { try { // only update item if bitstream not skipped if (filterClasses[i].processBitstream(c, myItem, myBitstream)) { myItem.update(); // Make sure new bitstream has a sequence // number filtered = true; } } catch (Exception e) { System.out.println( "ERROR filtering, skipping bitstream #" + myBitstream.getID() + " " + e); e.printStackTrace(); } } } return filtered; }
/** * Remove a file from an item * * @param context current DSpace context * @param item Item where file should be removed from * @param bitstreamID The id of bitstream representing the file to remove * @return Status or error flag which will be processed by UI-related code! (if STATUS_COMPLETE or * 0 is returned, no errors occurred!) */ protected int processRemoveFile(Context context, Item item, int bitstreamID) throws IOException, SQLException, AuthorizeException { Bitstream bitstream; // Try to find bitstream try { bitstream = Bitstream.find(context, bitstreamID); } catch (NumberFormatException nfe) { bitstream = null; } if (bitstream == null) { // Invalid or mangled bitstream ID // throw an error and return immediately return STATUS_INTEGRITY_ERROR; } // remove bitstream from bundle.. // delete bundle if it's now empty Bundle[] bundles = bitstream.getBundles(); bundles[0].removeBitstream(bitstream); Bitstream[] bitstreams = bundles[0].getBitstreams(); // remove bundle if it's now empty if (bitstreams.length < 1) { item.removeBundle(bundles[0]); item.update(); } // no errors occurred return STATUS_COMPLETE; }
// Create workflow start provenance message private static void recordStart(Context c, Item myitem) throws SQLException, IOException, AuthorizeException { // Get non-internal format bitstreams Bitstream[] bitstreams = myitem.getNonInternalBitstreams(); // get date DCDate now = DCDate.getCurrent(); // Create provenance description String provmessage = ""; if (myitem.getSubmitter() != null) { provmessage = "Submitted by " + myitem.getSubmitter().getFullName() + " (" + myitem.getSubmitter().getEmail() + ") on " + now.toString() + "\n"; } else // null submitter { provmessage = "Submitted by unknown (probably automated) on" + now.toString() + "\n"; } // add sizes and checksums of bitstreams provmessage += InstallItem.getBitstreamProvenanceMessage(myitem); // Add message to the DC myitem.addDC("description", "provenance", "en", provmessage); myitem.update(); }
/** Update the workflow item, including the unarchived item. */ public void update() throws SQLException, IOException, AuthorizeException { // FIXME check auth log.info( LogManager.getHeader(ourContext, "update_workflow_item", "workflow_item_id=" + getID())); // Update the item item.update(); // Update ourselves DatabaseManager.update(ourContext, wfRow); }
/** * Look up the id of the template item for a given collection. * * @param context The current DSpace context. * @param collectionID The collection id. * @return The id of the template item. * @throws IOException */ public static int getTemplateItemID(Context context, int collectionID) throws SQLException, AuthorizeException, IOException { Collection collection = Collection.find(context, collectionID); Item template = collection.getTemplateItem(); if (template == null) { collection.createTemplateItem(); template = collection.getTemplateItem(); collection.update(); template.update(); context.commit(); } return template.getID(); }
/** * rejects an item - rejection means undoing a submit - WorkspaceItem is created, and the * WorkflowItem is removed, user is emailed rejection_message. * * @param c Context * @param wi WorkflowItem to operate on * @param e EPerson doing the operation * @param rejection_message message to email to user */ public static WorkspaceItem reject( Context c, WorkflowItem wi, EPerson e, String rejection_message) throws SQLException, AuthorizeException, IOException { // authorize a DSpaceActions.REJECT // stop workflow deleteTasks(c, wi); // rejection provenance Item myitem = wi.getItem(); // Get current date String now = DCDate.getCurrent().toString(); // Get user's name + email address String usersName = getEPersonName(e); // Here's what happened String provDescription = "Rejected by " + usersName + ", reason: " + rejection_message + " on " + now + " (GMT) "; // Add to item as a DC field myitem.addDC("description", "provenance", "en", provDescription); myitem.update(); // convert into personal workspace WorkspaceItem wsi = returnToWorkspace(c, wi); // notify that it's been rejected notifyOfReject(c, wi, e, rejection_message); log.info( LogManager.getHeader( c, "reject_workflow", "workflow_item_id=" + wi.getID() + "item_id=" + wi.getItem().getID() + "collection_id=" + wi.getCollection().getID() + "eperson_id=" + e.getID())); return wsi; }
/* If we created a new Bitstream but now realised there is a problem then remove it. */ private void backoutBitstream(SubmissionInfo subInfo, Bitstream b, Item item) throws SQLException, AuthorizeException, IOException { // remove bitstream from bundle.. // delete bundle if it's now empty Bundle[] bnd = b.getBundles(); bnd[0].removeBitstream(b); Bitstream[] bitstreams = bnd[0].getBitstreams(); // remove bundle if it's now empty if (bitstreams.length < 1) { item.removeBundle(bnd[0]); item.update(); } subInfo.setBitstream(null); }
// Record approval provenance statement private static void recordApproval(Context c, WorkflowItem wi, EPerson e) throws SQLException, IOException, AuthorizeException { Item item = wi.getItem(); // Get user's name + email address String usersName = getEPersonName(e); // Get current date String now = DCDate.getCurrent().toString(); // Here's what happened String provDescription = "Approved for entry into archive by " + usersName + " on " + now + " (GMT) "; // add bitstream descriptions (name, size, checksums) provDescription += InstallItem.getBitstreamProvenanceMessage(item); // Add to item as a DC field item.addDC("description", "provenance", "en", provDescription); item.update(); }
/** * Perform a deposit, using the supplied SWORD Deposit object. * * @param deposit * @throws SWORDErrorException * @throws DSpaceSWORDException */ public DepositResult doDeposit(Deposit deposit) throws SWORDErrorException, DSpaceSWORDException { // get the things out of the service that we need Context context = swordService.getContext(); SWORDConfiguration swordConfig = swordService.getSwordConfig(); SWORDUrlManager urlManager = swordService.getUrlManager(); // FIXME: the spec is unclear what to do in this situation. I'm going // the throw a 415 (ERROR_CONTENT) until further notice // // determine if this is an acceptable file format if (!swordConfig.isAcceptableContentType(context, deposit.getContentType(), collection)) { log.error( "Unacceptable content type detected: " + deposit.getContentType() + " for collection " + collection.getID()); throw new SWORDErrorException( ErrorCodes.ERROR_CONTENT, "Unacceptable content type in deposit request: " + deposit.getContentType()); } // determine if this is an acceptable packaging type for the deposit // if not, we throw a 415 HTTP error (Unsupported Media Type, ERROR_CONTENT) if (!swordConfig.isSupportedMediaType(deposit.getPackaging(), this.collection)) { log.error( "Unacceptable packaging type detected: " + deposit.getPackaging() + "for collection" + collection.getID()); throw new SWORDErrorException( ErrorCodes.ERROR_CONTENT, "Unacceptable packaging type in deposit request: " + deposit.getPackaging()); } // Obtain the relevant ingester from the factory SWORDIngester si = SWORDIngesterFactory.getInstance(context, deposit, collection); swordService.message("Loaded ingester: " + si.getClass().getName()); // do the deposit DepositResult result = si.ingest(swordService, deposit, collection); swordService.message("Archive ingest completed successfully"); // if there's an item availalble, and we want to keep the original // then do that try { if (swordConfig.isKeepOriginal()) { swordService.message( "DSpace will store an original copy of the deposit, " + "as well as ingesting the item into the archive"); // in order to be allowed to add the file back to the item, we need to ignore authorisations // for a moment boolean ignoreAuth = context.ignoreAuthorization(); context.setIgnoreAuthorization(true); String bundleName = ConfigurationManager.getProperty("sword-server", "bundle.name"); if (bundleName == null || "".equals(bundleName)) { bundleName = "SWORD"; } Item item = result.getItem(); Bundle[] bundles = item.getBundles(bundleName); Bundle swordBundle = null; if (bundles.length > 0) { swordBundle = bundles[0]; } if (swordBundle == null) { swordBundle = item.createBundle(bundleName); } String fn = swordService.getFilename(context, deposit, true); Bitstream bitstream; FileInputStream fis = null; try { fis = new FileInputStream(deposit.getFile()); bitstream = swordBundle.createBitstream(fis); } finally { if (fis != null) { fis.close(); } } bitstream.setName(fn); bitstream.setDescription("SWORD deposit package"); BitstreamFormat bf = BitstreamFormat.findByMIMEType(context, deposit.getContentType()); if (bf != null) { bitstream.setFormat(bf); } bitstream.update(); swordBundle.update(); item.update(); swordService.message( "Original package stored as " + fn + ", in item bundle " + swordBundle); // now reset the context ignore authorisation context.setIgnoreAuthorization(ignoreAuth); // set the media link for the created item result.setMediaLink(urlManager.getMediaLink(bitstream)); } else { // set the vanilla media link, which doesn't resolve to anything result.setMediaLink(urlManager.getBaseMediaLinkUrl()); } } catch (SQLException e) { log.error("caught exception: ", e); throw new DSpaceSWORDException(e); } catch (AuthorizeException e) { log.error("caught exception: ", e); throw new DSpaceSWORDException(e); } catch (FileNotFoundException e) { log.error("caught exception: ", e); throw new DSpaceSWORDException(e); } catch (IOException e) { log.error("caught exception: ", e); throw new DSpaceSWORDException(e); } return result; }
/** * perform the ingest using the given deposit object onto the specified target dspace object, * using the sword service implementation * * @param service * @param deposit * @param target * @return * @throws DSpaceSWORDException * @throws SWORDErrorException */ public DepositResult ingest(SWORDService service, Deposit deposit, DSpaceObject target) throws DSpaceSWORDException, SWORDErrorException { try { if (!(target instanceof Item)) { throw new DSpaceSWORDException( "SimpleFileIngester can only be loaded for deposit onto DSpace Items"); } Item item = (Item) target; // now set the sword service SWORDService swordService = service; // get the things out of the service that we need Context context = swordService.getContext(); SWORDUrlManager urlManager = swordService.getUrlManager(); Bundle[] bundles = item.getBundles("ORIGINAL"); Bundle original; if (bundles.length > 0) { original = bundles[0]; } else { original = item.createBundle("ORIGINAL"); } Bitstream bs; FileInputStream fis = null; try { fis = new FileInputStream(deposit.getFile()); bs = original.createBitstream(fis); } finally { if (fis != null) { fis.close(); } } String fn = swordService.getFilename(context, deposit, false); bs.setName(fn); swordService.message("File created in item with filename " + fn); BitstreamFormat bf = BitstreamFormat.findByMIMEType(context, deposit.getContentType()); if (bf != null) { bs.setFormat(bf); } // to do the updates, we need to ignore authorisation in the context boolean ignoreAuth = context.ignoreAuthorization(); context.setIgnoreAuthorization(true); bs.update(); original.update(); item.update(); // reset the ignore authorisation context.setIgnoreAuthorization(ignoreAuth); DepositResult result = new DepositResult(); result.setHandle(urlManager.getBitstreamUrl(bs)); result.setTreatment(this.getTreatment()); result.setBitstream(bs); return result; } catch (SQLException e) { throw new DSpaceSWORDException(e); } catch (AuthorizeException e) { throw new DSpaceSWORDException(e); } catch (IOException e) { throw new DSpaceSWORDException(e); } }
private void processArchive( Context context, String sourceDirPath, String itemField, String metadataIndexName, boolean alterProvenance, boolean isTest) throws Exception { // open and process the source directory File sourceDir = new File(sourceDirPath); if ((sourceDir == null) || !sourceDir.exists() || !sourceDir.isDirectory()) { pr("Error, cannot open archive source directory " + sourceDirPath); throw new Exception("error with archive source directory " + sourceDirPath); } String[] dircontents = sourceDir.list(directoryFilter); // just the names, not the path Arrays.sort(dircontents); // Undo is suppressed to prevent undo of undo boolean suppressUndo = false; File fSuppressUndo = new File(sourceDir, SUPPRESS_UNDO_FILENAME); if (fSuppressUndo.exists()) { suppressUndo = true; } File undoDir = null; // sibling directory of source archive if (!suppressUndo && !isTest) { undoDir = initUndoArchive(sourceDir); } int itemCount = 0; int successItemCount = 0; for (String dirname : dircontents) { itemCount++; pr(""); pr("processing item " + dirname); try { ItemArchive itarch = ItemArchive.create(context, new File(sourceDir, dirname), itemField); for (UpdateAction action : actionMgr) { pr("action: " + action.getClass().getName()); action.execute(context, itarch, isTest, suppressUndo); if (!isTest && !suppressUndo) { itarch.writeUndo(undoDir); } } if (!isTest) { Item item = itarch.getItem(); item.update(); // need to update before commit context.commit(); item.decache(); } ItemUpdate.pr("Item " + dirname + " completed"); successItemCount++; } catch (Exception e) { pr("Exception processing item " + dirname + ": " + e.toString()); } } if (!suppressUndo && !isTest) { StringBuilder sb = new StringBuilder("dsrun org.dspace.app.itemupdate.ItemUpdate "); sb.append(" -e ").append(this.eperson); sb.append(" -s ").append(undoDir); if (itemField != null) { sb.append(" -i ").append(itemField); } if (!alterProvenance) { sb.append(" -P "); } if (isTest) { sb.append(" -t "); } for (String actionOption : undoActionList) { sb.append(actionOption); } PrintWriter pw = null; try { File cmdFile = new File(undoDir.getParent(), undoDir.getName() + "_command.sh"); pw = new PrintWriter(new BufferedWriter(new FileWriter(cmdFile))); pw.println(sb.toString()); } finally { pw.close(); } } pr(""); pr( "Done processing. Successful items: " + successItemCount + " of " + itemCount + " items in source archive"); pr(""); }
private void crosswalkPDF(Context context, Item item, InputStream metadata) throws CrosswalkException, IOException, SQLException, AuthorizeException { COSDocument cos = null; try { PDFParser parser = new PDFParser(metadata); parser.parse(); cos = parser.getDocument(); // sanity check: PDFBox breaks on encrypted documents, so give up. if (cos.getEncryptionDictionary() != null) throw new MetadataValidationException( "This packager cannot accept an encrypted PDF document."); /* PDF to DC "crosswalk": * * NOTE: This is not in a crosswalk plugin because (a) it isn't * useful anywhere else, and more importantly, (b) the source * data is not XML so it doesn't fit the plugin's interface. * * pattern of crosswalk -- PDF dict entries to DC: * Title -> title.null * Author -> contributor.author * CreationDate -> date.created * ModDate -> date.created * Creator -> description.provenance (application that created orig) * Producer -> description.provenance (convertor to pdf) * Subject -> description.abstract * Keywords -> subject.other * date is java.util.Calendar */ PDDocument pd = new PDDocument(cos); PDDocumentInformation docinfo = pd.getDocumentInformation(); String title = docinfo.getTitle(); // sanity check: item must have a title. if (title == null) throw new MetadataValidationException( "This PDF file is unacceptable, it does not have a value for \"Title\" in its Info dictionary."); log.debug("PDF Info dict title=\"" + title + "\""); item.addDC("title", null, "en", title); String value; Calendar date; if ((value = docinfo.getAuthor()) != null) { item.addDC("contributor", "author", null, value); log.debug("PDF Info dict author=\"" + value + "\""); } if ((value = docinfo.getCreator()) != null) item.addDC( "description", "provenance", "en", "Application that created the original document: " + value); if ((value = docinfo.getProducer()) != null) item.addDC( "description", "provenance", "en", "Original document converted to PDF by: " + value); if ((value = docinfo.getSubject()) != null) item.addDC("description", "abstract", null, value); if ((value = docinfo.getKeywords()) != null) item.addDC("subject", "other", null, value); // Take either CreationDate or ModDate as "date.created", // Too bad there's no place to put "last modified" in the DC. Calendar calValue; if ((calValue = docinfo.getCreationDate()) == null) calValue = docinfo.getModificationDate(); if (calValue != null) item.addDC("date", "created", null, (new DCDate(calValue.getTime())).toString()); item.update(); } finally { if (cos != null) cos.close(); } }
/** * Process the upload of a new file! * * @param context current DSpace context * @param request current servlet request object * @param response current servlet response object * @param subInfo submission info object * @return Status or error flag which will be processed by UI-related code! (if STATUS_COMPLETE or * 0 is returned, no errors occurred!) */ protected int processUploadFile( Context context, HttpServletRequest request, HttpServletResponse response, SubmissionInfo subInfo) throws ServletException, IOException, SQLException, AuthorizeException { boolean formatKnown = true; boolean fileOK = false; BitstreamFormat bf = null; Bitstream b = null; // NOTE: File should already be uploaded. // Manakin does this automatically via Cocoon. // For JSP-UI, the SubmissionController.uploadFiles() does the actual upload Enumeration attNames = request.getAttributeNames(); // loop through our request attributes while (attNames.hasMoreElements()) { String attr = (String) attNames.nextElement(); // if this ends with "-path", this attribute // represents a newly uploaded file if (attr.endsWith("-path")) { // strip off the -path to get the actual parameter // that the file was uploaded as String param = attr.replace("-path", ""); // Load the file's path and input stream and description String filePath = (String) request.getAttribute(param + "-path"); InputStream fileInputStream = (InputStream) request.getAttribute(param + "-inputstream"); // attempt to get description from attribute first, then direct from a parameter String fileDescription = (String) request.getAttribute(param + "-description"); if (fileDescription == null || fileDescription.length() == 0) { fileDescription = request.getParameter("description"); } // if information wasn't passed by User Interface, we had a problem // with the upload if (filePath == null || fileInputStream == null) { return STATUS_UPLOAD_ERROR; } if (subInfo == null) { // In any event, if we don't have the submission info, the request // was malformed return STATUS_INTEGRITY_ERROR; } // Create the bitstream Item item = subInfo.getSubmissionItem().getItem(); // do we already have a bundle? Bundle[] bundles = item.getBundles("ORIGINAL"); if (bundles.length < 1) { // set bundle's name to ORIGINAL b = item.createSingleBitstream(fileInputStream, "ORIGINAL"); } else { // we have a bundle already, just add bitstream b = bundles[0].createBitstream(fileInputStream); } // Strip all but the last filename. It would be nice // to know which OS the file came from. String noPath = filePath; while (noPath.indexOf('/') > -1) { noPath = noPath.substring(noPath.indexOf('/') + 1); } while (noPath.indexOf('\\') > -1) { noPath = noPath.substring(noPath.indexOf('\\') + 1); } b.setName(noPath); b.setSource(filePath); b.setDescription(fileDescription); // Identify the format bf = FormatIdentifier.guessFormat(context, b); b.setFormat(bf); // Update to DB b.update(); item.update(); if ((bf != null) && (bf.isInternal())) { log.warn("Attempt to upload file format marked as internal system use only"); backoutBitstream(subInfo, b, item); return STATUS_UPLOAD_ERROR; } // Check for virus if (ConfigurationManager.getBooleanProperty("submission-curation", "virus-scan")) { Curator curator = new Curator(); curator.addTask("vscan").curate(item); int status = curator.getStatus("vscan"); if (status == Curator.CURATE_ERROR) { backoutBitstream(subInfo, b, item); return STATUS_VIRUS_CHECKER_UNAVAILABLE; } else if (status == Curator.CURATE_FAIL) { backoutBitstream(subInfo, b, item); return STATUS_CONTAINS_VIRUS; } } // If we got this far then everything is more or less ok. // Comment - not sure if this is the right place for a commit here // but I'm not brave enough to remove it - Robin. context.commit(); // save this bitstream to the submission info, as the // bitstream we're currently working with subInfo.setBitstream(b); // if format was not identified if (bf == null) { return STATUS_UNKNOWN_FORMAT; } } // end if attribute ends with "-path" } // end while return STATUS_COMPLETE; }