/** * each entry represents a bitstream.... * * @param c * @param i * @param path * @param fileName * @param bundleName * @throws SQLException * @throws IOException * @throws AuthorizeException */ private void processContentFileEntry( Context c, Item i, String path, String fileName, String bundleName, boolean primary) throws SQLException, IOException, AuthorizeException { String fullpath = path + File.separatorChar + fileName; // get an input stream BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fullpath)); Bitstream bs = null; String newBundleName = bundleName; if (bundleName == null) { // is it license.txt? if ("license.txt".equals(fileName)) { newBundleName = "LICENSE"; } else { // call it ORIGINAL newBundleName = "ORIGINAL"; } } if (!isTest) { // find the bundle Bundle[] bundles = i.getBundles(newBundleName); Bundle targetBundle = null; if (bundles.length < 1) { // not found, create a new one targetBundle = i.createBundle(newBundleName); } else { // put bitstreams into first bundle targetBundle = bundles[0]; } // now add the bitstream bs = targetBundle.createBitstream(bis); bs.setName(fileName); // Identify the format // FIXME - guessing format guesses license.txt incorrectly as a text // file format! BitstreamFormat bf = FormatIdentifier.guessFormat(c, bs); bs.setFormat(bf); // Is this a the primary bitstream? if (primary) { targetBundle.setPrimaryBitstreamID(bs.getID()); targetBundle.update(); } bs.update(); } bis.close(); }
/** * Register the bitstream file into DSpace * * @param c * @param i * @param assetstore * @param bitstreamPath the full filepath expressed in the contents file * @param bundleName * @throws SQLException * @throws IOException * @throws AuthorizeException */ private void registerBitstream( Context c, Item i, int assetstore, String bitstreamPath, String bundleName) throws SQLException, IOException, AuthorizeException { // TODO validate assetstore number // TODO make sure the bitstream is there Bitstream bs = null; String newBundleName = bundleName; if (bundleName == null) { // is it license.txt? if (bitstreamPath.endsWith("license.txt")) { newBundleName = "LICENSE"; } else { // call it ORIGINAL newBundleName = "ORIGINAL"; } } if (!isTest) { // find the bundle Bundle[] bundles = i.getBundles(newBundleName); Bundle targetBundle = null; if (bundles.length < 1) { // not found, create a new one targetBundle = i.createBundle(newBundleName); } else { // put bitstreams into first bundle targetBundle = bundles[0]; } // now add the bitstream bs = targetBundle.registerBitstream(assetstore, bitstreamPath); // set the name to just the filename int iLastSlash = bitstreamPath.lastIndexOf('/'); bs.setName(bitstreamPath.substring(iLastSlash + 1)); // Identify the format // FIXME - guessing format guesses license.txt incorrectly as a text file format! BitstreamFormat bf = FormatIdentifier.guessFormat(c, bs); bs.setFormat(bf); bs.update(); } }
/** * Create a new DSpace item out of a METS content package. All contents are dictated by the METS * manifest. Package is a ZIP archive, all files relative to top level and the manifest (as per * spec) in mets.xml. * * @param context - DSpace context. * @param collection - collection under which to create new item. * @param pkg - input stream containing package to ingest. * @param license - may be null, which takes default license. * @return workspace item created by ingest. * @throws PackageValidationException if package is unacceptable or there is a fatal error turning * it into an Item. */ public WorkspaceItem ingest( Context context, Collection collection, InputStream pkg, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { BitstreamDAO bsDAO = BitstreamDAOFactory.getInstance(context); BitstreamFormatDAO bfDAO = BitstreamFormatDAOFactory.getInstance(context); BundleDAO bundleDAO = BundleDAOFactory.getInstance(context); WorkspaceItemDAO wsiDAO = WorkspaceItemDAOFactory.getInstance(context); ZipInputStream zip = new ZipInputStream(pkg); HashMap fileIdToBitstream = new HashMap(); WorkspaceItem wi = null; boolean success = false; HashSet packageFiles = new HashSet(); boolean validate = params.getBooleanProperty("validate", true); try { /* 1. Read all the files in the Zip into bitstreams first, * because we only get to take one pass through a Zip input * stream. Give them temporary bitstream names corresponding * to the same names they had in the Zip, since those MUST * match the URL references in <Flocat> and <mdRef> elements. */ METSManifest manifest = null; wi = wsiDAO.create(collection, false); Item item = wi.getItem(); Bundle contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME); Bundle mdBundle = null; ZipEntry ze; while ((ze = zip.getNextEntry()) != null) { if (ze.isDirectory()) continue; Bitstream bs = null; String fname = ze.getName(); if (fname.equals(MANIFEST_FILE)) { if (preserveManifest) { mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); bs.setName(fname); bs.setSource(fname); // Get magic bitstream format to identify manifest. BitstreamFormat manifestFormat = null; manifestFormat = PackageUtils.findOrCreateBitstreamFormat( context, MANIFEST_BITSTREAM_FORMAT, "application/xml", MANIFEST_BITSTREAM_FORMAT + " package manifest"); bs.setFormat(manifestFormat); manifest = METSManifest.create(bs.retrieve(), validate); } else { manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate); continue; } } else { bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); bs.setSource(fname); bs.setName(fname); } packageFiles.add(fname); bs.setSource(fname); bsDAO.update(bs); } zip.close(); if (manifest == null) throw new PackageValidationException( "No METS Manifest found (filename=" + MANIFEST_FILE + "). Package is unacceptable."); // initial sanity checks on manifest (in subclass) checkManifest(manifest); /* 2. Grovel a file list out of METS Manifest and compare * it to the files in package, as an integrity test. */ List manifestContentFiles = manifest.getContentFiles(); // Compare manifest files with the ones found in package: // a. Start with content files (mentioned in <fileGrp>s) HashSet missingFiles = new HashSet(); for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) { // First locate corresponding Bitstream and make // map of Bitstream to <file> ID. Element mfile = (Element) mi.next(); String mfileId = mfile.getAttributeValue("ID"); if (mfileId == null) throw new PackageValidationException( "Invalid METS Manifest: file element without ID attribute."); String path = METSManifest.getFileName(mfile); Bitstream bs = contentBundle.getBitstreamByName(path); if (bs == null) { log.warn( "Cannot find bitstream for filename=\"" + path + "\", skipping it..may cause problems later."); missingFiles.add(path); } else { fileIdToBitstream.put(mfileId, bs); // Now that we're done using Name to match to <file>, // set default bitstream Name to last path element; // Zip entries all have '/' pathname separators // NOTE: set default here, hopefully crosswalk of // a bitstream techMD section will override it. String fname = bs.getName(); int lastSlash = fname.lastIndexOf('/'); if (lastSlash >= 0 && lastSlash + 1 < fname.length()) bs.setName(fname.substring(lastSlash + 1)); // Set Default bitstream format: // 1. attempt to guess from MIME type // 2. if that fails, guess from "name" extension. String mimeType = mfile.getAttributeValue("MIMETYPE"); BitstreamFormat bf = (mimeType == null) ? null : bfDAO.retrieveByMimeType(mimeType); if (bf == null) bf = FormatIdentifier.guessFormat(context, bs); bs.setFormat(bf); // if this bitstream belongs in another Bundle, move it: String bundleName = manifest.getBundleName(mfile); if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) { Bundle bn; Bundle bns[] = item.getBundles(bundleName); if (bns != null && bns.length > 0) bn = bns[0]; else bn = item.createBundle(bundleName); bn.addBitstream(bs); contentBundle.removeBitstream(bs); } // finally, build compare lists by deleting matches. if (packageFiles.contains(path)) packageFiles.remove(path); else missingFiles.add(path); } } // b. Process files mentioned in <mdRef>s - check and move // to METADATA bundle. for (Iterator mi = manifest.getMdFiles().iterator(); mi.hasNext(); ) { Element mdref = (Element) mi.next(); String path = METSManifest.getFileName(mdref); // finally, build compare lists by deleting matches. if (packageFiles.contains(path)) packageFiles.remove(path); else missingFiles.add(path); // if there is a bitstream with that name in Content, move // it to the Metadata bundle: Bitstream mdbs = contentBundle.getBitstreamByName(path); if (mdbs != null) { if (mdBundle == null) mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); mdBundle.addBitstream(mdbs); contentBundle.removeBitstream(mdbs); } } // KLUDGE: make sure Manifest file doesn't get flagged as missing // or extra, since it won't be mentioned in the manifest. if (packageFiles.contains(MANIFEST_FILE)) packageFiles.remove(MANIFEST_FILE); // Give subclass a chance to refine the lists of in-package // and missing files, delete extraneous files, etc. checkPackageFiles(packageFiles, missingFiles, manifest); // Any discrepency in file lists is a fatal error: if (!(packageFiles.isEmpty() && missingFiles.isEmpty())) { StringBuffer msg = new StringBuffer("Package is unacceptable: contents do not match manifest."); if (!missingFiles.isEmpty()) { msg.append("\nPackage is missing these files listed in Manifest:"); for (Iterator mi = missingFiles.iterator(); mi.hasNext(); ) msg.append("\n\t" + (String) mi.next()); } if (!packageFiles.isEmpty()) { msg.append("\nPackage contains extra files NOT in manifest:"); for (Iterator mi = packageFiles.iterator(); mi.hasNext(); ) msg.append("\n\t" + (String) mi.next()); } throw new PackageValidationException(msg.toString()); } /* 3. crosswalk the metadata */ // get mdref'd streams from "callback" object. MdrefManager callback = new MdrefManager(mdBundle); chooseItemDmd(context, item, manifest, callback, manifest.getItemDmds()); // crosswalk content bitstreams too. for (Iterator ei = fileIdToBitstream.entrySet().iterator(); ei.hasNext(); ) { Map.Entry ee = (Map.Entry) ei.next(); manifest.crosswalkBitstream( context, (Bitstream) ee.getValue(), (String) ee.getKey(), callback); } // Take a second pass over files to correct names of derived files // (e.g. thumbnails, extracted text) to what DSpace expects: for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) { Element mfile = (Element) mi.next(); String bundleName = manifest.getBundleName(mfile); if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) { Element origFile = manifest.getOriginalFile(mfile); if (origFile != null) { String ofileId = origFile.getAttributeValue("ID"); Bitstream obs = (Bitstream) fileIdToBitstream.get(ofileId); String newName = makeDerivedFilename(bundleName, obs.getName()); if (newName != null) { String mfileId = mfile.getAttributeValue("ID"); Bitstream bs = (Bitstream) fileIdToBitstream.get(mfileId); bs.setName(newName); bsDAO.update(bs); } } } } // Sanity-check the resulting metadata on the Item: PackageUtils.checkMetadata(item); /* 4. Set primary bitstream; same Bundle */ Element pbsFile = manifest.getPrimaryBitstream(); if (pbsFile != null) { Bitstream pbs = (Bitstream) fileIdToBitstream.get(pbsFile.getAttributeValue("ID")); if (pbs == null) log.error( "Got Primary Bitstream file ID=" + pbsFile.getAttributeValue("ID") + ", but found no corresponding bitstream."); else { List<Bundle> bn = bundleDAO.getBundles(pbs); if (bn.size() > 0) bn.get(0).setPrimaryBitstreamID(pbs.getID()); else log.error("Sanity check, got primary bitstream without any parent bundle."); } } // have subclass manage license since it may be extra package file. addLicense(context, collection, item, manifest, callback, license); // subclass hook for final checks and rearrangements finishItem(context, item); // commit any changes to bundles Bundle allBn[] = item.getBundles(); for (int i = 0; i < allBn.length; ++i) { bundleDAO.update(allBn[i]); } wsiDAO.update(wi); success = true; log.info( LogManager.getHeader( context, "ingest", "Created new Item, db ID=" + String.valueOf(item.getID()) + ", WorkspaceItem ID=" + String.valueOf(wi.getID()))); return wi; } catch (SQLException se) { // disable attempt to delete the workspace object, since // database may have suffered a fatal error and the // transaction rollback will get rid of it anyway. wi = null; // Pass this exception on to the next handler. throw se; } finally { // kill item (which also deletes bundles, bitstreams) if ingest fails if (!success && wi != null) wsiDAO.deleteAll(wi.getID()); } }
/** * Process the upload of a new file! * * @param context current DSpace context * @param request current servlet request object * @param response current servlet response object * @param subInfo submission info object * @return Status or error flag which will be processed by UI-related code! (if STATUS_COMPLETE or * 0 is returned, no errors occurred!) */ protected int processUploadFile( Context context, HttpServletRequest request, HttpServletResponse response, SubmissionInfo subInfo) throws ServletException, IOException, SQLException, AuthorizeException { boolean formatKnown = true; boolean fileOK = false; BitstreamFormat bf = null; Bitstream b = null; // NOTE: File should already be uploaded. // Manakin does this automatically via Cocoon. // For JSP-UI, the SubmissionController.uploadFiles() does the actual upload Enumeration attNames = request.getAttributeNames(); // loop through our request attributes while (attNames.hasMoreElements()) { String attr = (String) attNames.nextElement(); // if this ends with "-path", this attribute // represents a newly uploaded file if (attr.endsWith("-path")) { // strip off the -path to get the actual parameter // that the file was uploaded as String param = attr.replace("-path", ""); // Load the file's path and input stream and description String filePath = (String) request.getAttribute(param + "-path"); InputStream fileInputStream = (InputStream) request.getAttribute(param + "-inputstream"); // attempt to get description from attribute first, then direct from a parameter String fileDescription = (String) request.getAttribute(param + "-description"); if (fileDescription == null || fileDescription.length() == 0) { fileDescription = request.getParameter("description"); } // if information wasn't passed by User Interface, we had a problem // with the upload if (filePath == null || fileInputStream == null) { return STATUS_UPLOAD_ERROR; } if (subInfo == null) { // In any event, if we don't have the submission info, the request // was malformed return STATUS_INTEGRITY_ERROR; } // Create the bitstream Item item = subInfo.getSubmissionItem().getItem(); // do we already have a bundle? Bundle[] bundles = item.getBundles("ORIGINAL"); if (bundles.length < 1) { // set bundle's name to ORIGINAL b = item.createSingleBitstream(fileInputStream, "ORIGINAL"); } else { // we have a bundle already, just add bitstream b = bundles[0].createBitstream(fileInputStream); } // Strip all but the last filename. It would be nice // to know which OS the file came from. String noPath = filePath; while (noPath.indexOf('/') > -1) { noPath = noPath.substring(noPath.indexOf('/') + 1); } while (noPath.indexOf('\\') > -1) { noPath = noPath.substring(noPath.indexOf('\\') + 1); } b.setName(noPath); b.setSource(filePath); b.setDescription(fileDescription); // Identify the format bf = FormatIdentifier.guessFormat(context, b); b.setFormat(bf); // Update to DB b.update(); item.update(); if ((bf != null) && (bf.isInternal())) { log.warn("Attempt to upload file format marked as internal system use only"); backoutBitstream(subInfo, b, item); return STATUS_UPLOAD_ERROR; } // Check for virus if (ConfigurationManager.getBooleanProperty("submission-curation", "virus-scan")) { Curator curator = new Curator(); curator.addTask("vscan").curate(item); int status = curator.getStatus("vscan"); if (status == Curator.CURATE_ERROR) { backoutBitstream(subInfo, b, item); return STATUS_VIRUS_CHECKER_UNAVAILABLE; } else if (status == Curator.CURATE_FAIL) { backoutBitstream(subInfo, b, item); return STATUS_CONTAINS_VIRUS; } } // If we got this far then everything is more or less ok. // Comment - not sure if this is the right place for a commit here // but I'm not brave enough to remove it - Robin. context.commit(); // save this bitstream to the submission info, as the // bitstream we're currently working with subInfo.setBitstream(b); // if format was not identified if (bf == null) { return STATUS_UNKNOWN_FORMAT; } } // end if attribute ends with "-path" } // end while return STATUS_COMPLETE; }