/** * Delete bitstream from item * * @param context * @param ItemArchive * @param isTest * @param suppressUndo * @throws IllegalArgumentException * @throws ParseException * @throws IOException * @throws AuthorizeException * @throws SQLException */ public void execute(Context context, ItemArchive itarch, boolean isTest, boolean suppressUndo) throws IllegalArgumentException, IOException, SQLException, AuthorizeException, ParseException { File f = new File(itarch.getDirectory(), ItemUpdate.DELETE_CONTENTS_FILE); if (!f.exists()) { ItemUpdate.pr( "Warning: Delete_contents file for item " + itarch.getDirectoryName() + " not found."); } else { List<Integer> list = MetadataUtilities.readDeleteContentsFile(f); if (list.isEmpty()) { ItemUpdate.pr("Warning: empty delete_contents file for item " + itarch.getDirectoryName()); } else { for (int id : list) { try { Bitstream bs = Bitstream.find(context, id); if (bs == null) { ItemUpdate.pr("Bitstream not found by id: " + id); } else { Bundle[] bundles = bs.getBundles(); for (Bundle b : bundles) { if (isTest) { ItemUpdate.pr("Delete bitstream with id = " + id); } else { b.removeBitstream(bs); ItemUpdate.pr("Deleted bitstream with id = " + id); } } if (alterProvenance) { DtoMetadata dtom = DtoMetadata.create("dc.description.provenance", "en", ""); String append = "Bitstream " + bs.getName() + " deleted on " + DCDate.getCurrent() + "; "; Item item = bundles[0].getItems()[0]; ItemUpdate.pr("Append provenance with: " + append); if (!isTest) { MetadataUtilities.appendMetadata(item, dtom, false, append); } } } } catch (SQLException e) { ItemUpdate.pr("Error finding bitstream from id: " + id + " : " + e.toString()); } } } } }
/** * Process Item, correcting CC-License if encountered. * * @param item * @throws SQLException * @throws AuthorizeException * @throws IOException */ protected static void handleItem(Item item) throws SQLException, AuthorizeException, IOException { Bundle[] bundles = item.getBundles("CC-LICENSE"); if (bundles == null || bundles.length == 0) return; Bundle bundle = bundles[0]; Bitstream bitstream = bundle.getBitstreamByName("license_rdf"); String license_rdf = new String(copy(bitstream)); /* quickly fix xml by ripping out offensive parts */ license_rdf = license_rdf.replaceFirst("<license", ""); license_rdf = license_rdf.replaceFirst("</license>", ""); StringWriter result = new StringWriter(); try { templates .newTransformer() .transform( new StreamSource(new ByteArrayInputStream(license_rdf.getBytes())), new StreamResult(result)); } catch (TransformerException e) { throw new RuntimeException(e.getMessage(), e); } StringBuffer buffer = result.getBuffer(); Bitstream newBitstream = bundle.createBitstream(new ByteArrayInputStream(buffer.toString().getBytes())); newBitstream.setName(bitstream.getName()); newBitstream.setDescription(bitstream.getDescription()); newBitstream.setFormat(bitstream.getFormat()); newBitstream.setSource(bitstream.getSource()); newBitstream.setUserFormatDescription(bitstream.getUserFormatDescription()); newBitstream.update(); bundle.removeBitstream(bitstream); bundle.update(); }
/** * Create a new DSpace item out of a METS content package. All contents are dictated by the METS * manifest. Package is a ZIP archive, all files relative to top level and the manifest (as per * spec) in mets.xml. * * @param context - DSpace context. * @param collection - collection under which to create new item. * @param pkg - input stream containing package to ingest. * @param license - may be null, which takes default license. * @return workspace item created by ingest. * @throws PackageValidationException if package is unacceptable or there is a fatal error turning * it into an Item. */ public WorkspaceItem ingest( Context context, Collection collection, InputStream pkg, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { BitstreamDAO bsDAO = BitstreamDAOFactory.getInstance(context); BitstreamFormatDAO bfDAO = BitstreamFormatDAOFactory.getInstance(context); BundleDAO bundleDAO = BundleDAOFactory.getInstance(context); WorkspaceItemDAO wsiDAO = WorkspaceItemDAOFactory.getInstance(context); ZipInputStream zip = new ZipInputStream(pkg); HashMap fileIdToBitstream = new HashMap(); WorkspaceItem wi = null; boolean success = false; HashSet packageFiles = new HashSet(); boolean validate = params.getBooleanProperty("validate", true); try { /* 1. Read all the files in the Zip into bitstreams first, * because we only get to take one pass through a Zip input * stream. Give them temporary bitstream names corresponding * to the same names they had in the Zip, since those MUST * match the URL references in <Flocat> and <mdRef> elements. */ METSManifest manifest = null; wi = wsiDAO.create(collection, false); Item item = wi.getItem(); Bundle contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME); Bundle mdBundle = null; ZipEntry ze; while ((ze = zip.getNextEntry()) != null) { if (ze.isDirectory()) continue; Bitstream bs = null; String fname = ze.getName(); if (fname.equals(MANIFEST_FILE)) { if (preserveManifest) { mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); bs.setName(fname); bs.setSource(fname); // Get magic bitstream format to identify manifest. BitstreamFormat manifestFormat = null; manifestFormat = PackageUtils.findOrCreateBitstreamFormat( context, MANIFEST_BITSTREAM_FORMAT, "application/xml", MANIFEST_BITSTREAM_FORMAT + " package manifest"); bs.setFormat(manifestFormat); manifest = METSManifest.create(bs.retrieve(), validate); } else { manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate); continue; } } else { bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); bs.setSource(fname); bs.setName(fname); } packageFiles.add(fname); bs.setSource(fname); bsDAO.update(bs); } zip.close(); if (manifest == null) throw new PackageValidationException( "No METS Manifest found (filename=" + MANIFEST_FILE + "). Package is unacceptable."); // initial sanity checks on manifest (in subclass) checkManifest(manifest); /* 2. Grovel a file list out of METS Manifest and compare * it to the files in package, as an integrity test. */ List manifestContentFiles = manifest.getContentFiles(); // Compare manifest files with the ones found in package: // a. Start with content files (mentioned in <fileGrp>s) HashSet missingFiles = new HashSet(); for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) { // First locate corresponding Bitstream and make // map of Bitstream to <file> ID. Element mfile = (Element) mi.next(); String mfileId = mfile.getAttributeValue("ID"); if (mfileId == null) throw new PackageValidationException( "Invalid METS Manifest: file element without ID attribute."); String path = METSManifest.getFileName(mfile); Bitstream bs = contentBundle.getBitstreamByName(path); if (bs == null) { log.warn( "Cannot find bitstream for filename=\"" + path + "\", skipping it..may cause problems later."); missingFiles.add(path); } else { fileIdToBitstream.put(mfileId, bs); // Now that we're done using Name to match to <file>, // set default bitstream Name to last path element; // Zip entries all have '/' pathname separators // NOTE: set default here, hopefully crosswalk of // a bitstream techMD section will override it. String fname = bs.getName(); int lastSlash = fname.lastIndexOf('/'); if (lastSlash >= 0 && lastSlash + 1 < fname.length()) bs.setName(fname.substring(lastSlash + 1)); // Set Default bitstream format: // 1. attempt to guess from MIME type // 2. if that fails, guess from "name" extension. String mimeType = mfile.getAttributeValue("MIMETYPE"); BitstreamFormat bf = (mimeType == null) ? null : bfDAO.retrieveByMimeType(mimeType); if (bf == null) bf = FormatIdentifier.guessFormat(context, bs); bs.setFormat(bf); // if this bitstream belongs in another Bundle, move it: String bundleName = manifest.getBundleName(mfile); if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) { Bundle bn; Bundle bns[] = item.getBundles(bundleName); if (bns != null && bns.length > 0) bn = bns[0]; else bn = item.createBundle(bundleName); bn.addBitstream(bs); contentBundle.removeBitstream(bs); } // finally, build compare lists by deleting matches. if (packageFiles.contains(path)) packageFiles.remove(path); else missingFiles.add(path); } } // b. Process files mentioned in <mdRef>s - check and move // to METADATA bundle. for (Iterator mi = manifest.getMdFiles().iterator(); mi.hasNext(); ) { Element mdref = (Element) mi.next(); String path = METSManifest.getFileName(mdref); // finally, build compare lists by deleting matches. if (packageFiles.contains(path)) packageFiles.remove(path); else missingFiles.add(path); // if there is a bitstream with that name in Content, move // it to the Metadata bundle: Bitstream mdbs = contentBundle.getBitstreamByName(path); if (mdbs != null) { if (mdBundle == null) mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); mdBundle.addBitstream(mdbs); contentBundle.removeBitstream(mdbs); } } // KLUDGE: make sure Manifest file doesn't get flagged as missing // or extra, since it won't be mentioned in the manifest. if (packageFiles.contains(MANIFEST_FILE)) packageFiles.remove(MANIFEST_FILE); // Give subclass a chance to refine the lists of in-package // and missing files, delete extraneous files, etc. checkPackageFiles(packageFiles, missingFiles, manifest); // Any discrepency in file lists is a fatal error: if (!(packageFiles.isEmpty() && missingFiles.isEmpty())) { StringBuffer msg = new StringBuffer("Package is unacceptable: contents do not match manifest."); if (!missingFiles.isEmpty()) { msg.append("\nPackage is missing these files listed in Manifest:"); for (Iterator mi = missingFiles.iterator(); mi.hasNext(); ) msg.append("\n\t" + (String) mi.next()); } if (!packageFiles.isEmpty()) { msg.append("\nPackage contains extra files NOT in manifest:"); for (Iterator mi = packageFiles.iterator(); mi.hasNext(); ) msg.append("\n\t" + (String) mi.next()); } throw new PackageValidationException(msg.toString()); } /* 3. crosswalk the metadata */ // get mdref'd streams from "callback" object. MdrefManager callback = new MdrefManager(mdBundle); chooseItemDmd(context, item, manifest, callback, manifest.getItemDmds()); // crosswalk content bitstreams too. for (Iterator ei = fileIdToBitstream.entrySet().iterator(); ei.hasNext(); ) { Map.Entry ee = (Map.Entry) ei.next(); manifest.crosswalkBitstream( context, (Bitstream) ee.getValue(), (String) ee.getKey(), callback); } // Take a second pass over files to correct names of derived files // (e.g. thumbnails, extracted text) to what DSpace expects: for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) { Element mfile = (Element) mi.next(); String bundleName = manifest.getBundleName(mfile); if (!bundleName.equals(Constants.CONTENT_BUNDLE_NAME)) { Element origFile = manifest.getOriginalFile(mfile); if (origFile != null) { String ofileId = origFile.getAttributeValue("ID"); Bitstream obs = (Bitstream) fileIdToBitstream.get(ofileId); String newName = makeDerivedFilename(bundleName, obs.getName()); if (newName != null) { String mfileId = mfile.getAttributeValue("ID"); Bitstream bs = (Bitstream) fileIdToBitstream.get(mfileId); bs.setName(newName); bsDAO.update(bs); } } } } // Sanity-check the resulting metadata on the Item: PackageUtils.checkMetadata(item); /* 4. Set primary bitstream; same Bundle */ Element pbsFile = manifest.getPrimaryBitstream(); if (pbsFile != null) { Bitstream pbs = (Bitstream) fileIdToBitstream.get(pbsFile.getAttributeValue("ID")); if (pbs == null) log.error( "Got Primary Bitstream file ID=" + pbsFile.getAttributeValue("ID") + ", but found no corresponding bitstream."); else { List<Bundle> bn = bundleDAO.getBundles(pbs); if (bn.size() > 0) bn.get(0).setPrimaryBitstreamID(pbs.getID()); else log.error("Sanity check, got primary bitstream without any parent bundle."); } } // have subclass manage license since it may be extra package file. addLicense(context, collection, item, manifest, callback, license); // subclass hook for final checks and rearrangements finishItem(context, item); // commit any changes to bundles Bundle allBn[] = item.getBundles(); for (int i = 0; i < allBn.length; ++i) { bundleDAO.update(allBn[i]); } wsiDAO.update(wi); success = true; log.info( LogManager.getHeader( context, "ingest", "Created new Item, db ID=" + String.valueOf(item.getID()) + ", WorkspaceItem ID=" + String.valueOf(wi.getID()))); return wi; } catch (SQLException se) { // disable attempt to delete the workspace object, since // database may have suffered a fatal error and the // transaction rollback will get rid of it anyway. wi = null; // Pass this exception on to the next handler. throw se; } finally { // kill item (which also deletes bundles, bitstreams) if ingest fails if (!success && wi != null) wsiDAO.deleteAll(wi.getID()); } }
/** * Create new Item out of the ingested package, in the indicated collection. It creates a * workspace item, which the application can then install if it chooses to bypass Workflow. * * <p>This is a VERY crude import of a single Adobe PDF (Portable Document Format) file, using the * document's embedded metadata for package metadata. If the PDF file hasn't got the minimal * metadata available, it is rejected. * * <p> * * @param context DSpace context. * @param collection collection under which to create new item. * @param pkg input stream containing package to ingest. * @param params package parameters (none recognized) * @param license may be null, which takes default license. * @return workspace item created by ingest. * @throws PackageException if package is unacceptable or there is a fatal error turning it into * an Item. */ public WorkspaceItem ingest( Context context, Collection collection, InputStream pkg, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { InputStream bis = null; COSDocument cos = null; boolean success = false; Bundle original = null; Bitstream bs = null; WorkspaceItem wi = null; /** * XXX comment out for now // XXX for debugging of parameter handling if (params != null) { * Enumeration pe = params.propertyNames(); while (pe.hasMoreElements()) { String name = * (String)pe.nextElement(); String v[] = params.getProperties(name); StringBuffer msg = new * StringBuffer("PackageParam: "); msg.append(name).append(" = "); for (int i = 0; i < v.length; * ++i) { if (i > 0) msg.append(", "); msg.append(v[i]); } log.debug(msg); } } */ try { // Save the PDF in a bitstream first, since the parser // has to read it as well, and we cannot "rewind" it after that. wi = WorkspaceItem.create(context, collection, false); Item myitem = wi.getItem(); original = myitem.createBundle("ORIGINAL"); bs = original.createBitstream(pkg); pkg.close(); bs.setName("package.pdf"); setFormatToMIMEType(context, bs, "application/pdf"); bs.update(); log.debug("Created bitstream ID=" + String.valueOf(bs.getID()) + ", parsing..."); crosswalkPDF(context, myitem, bs.retrieve()); wi.update(); context.commit(); success = true; log.info( LogManager.getHeader( context, "ingest", "Created new Item, db ID=" + String.valueOf(myitem.getID()) + ", WorkspaceItem ID=" + String.valueOf(wi.getID()))); return wi; } finally { try { // Close bitstream input stream and PDF file. if (bis != null) bis.close(); if (cos != null) cos.close(); } catch (IOException ie) { } // get rid of bitstream and item if ingest fails if (!success) { if (original != null && bs != null) original.removeBitstream(bs); if (wi != null) wi.deleteAll(); } context.commit(); } }