SevenZipExtractor( IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) throws IngestModuleException { if (!SevenZip.isInitializedSuccessfully() && (SevenZip.getLastInitializationException() == null)) { try { SevenZip.initSevenZipFromPlatformJAR(); String platform = SevenZip.getUsedPlatform(); logger.log( Level.INFO, "7-Zip-JBinding library was initialized on supported platform: {0}", platform); // NON-NLS } catch (SevenZipNativeInitializationException e) { logger.log(Level.SEVERE, "Error initializing 7-Zip-JBinding library", e); // NON-NLS String msg = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.init.errInitModule.msg", EmbeddedFileExtractorModuleFactory.getModuleName()); String details = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.init.errCantInitLib", e.getMessage()); services.postMessage( IngestMessage.createErrorMessage( EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details)); throw new IngestModuleException(e.getMessage()); } } this.context = context; this.fileTypeDetector = fileTypeDetector; this.moduleDirRelative = moduleDirRelative; this.moduleDirAbsolute = moduleDirAbsolute; this.archiveDepthCountTree = new ArchiveDepthCountTree(); }
/** * Unpack the file to local folder and return a list of derived files * * @param pipelineContext current ingest context * @param archiveFile file to unpack * @return list of unpacked derived files */ void unpack(AbstractFile archiveFile) { String archiveFilePath; try { archiveFilePath = archiveFile.getUniquePath(); } catch (TskCoreException ex) { archiveFilePath = archiveFile.getParentPath() + archiveFile.getName(); } // check if already has derived files, skip try { if (archiveFile.hasChildren()) { // check if local unpacked dir exists if (new File(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile)).exists()) { logger.log( Level.INFO, "File already has been processed as it has children and local unpacked file, skipping: {0}", archiveFilePath); // NON-NLS return; } } } catch (TskCoreException e) { logger.log( Level.INFO, "Error checking if file already has been processed, skipping: {0}", archiveFilePath); // NON-NLS return; } List<AbstractFile> unpackedFiles = Collections.<AbstractFile>emptyList(); // recursion depth check for zip bomb final long archiveId = archiveFile.getId(); SevenZipExtractor.ArchiveDepthCountTree.Archive parentAr = archiveDepthCountTree.findArchive(archiveId); if (parentAr == null) { parentAr = archiveDepthCountTree.addArchive(null, archiveId); } else if (parentAr.getDepth() == MAX_DEPTH) { String msg = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnMsg.zipBomb", archiveFile.getName()); String details = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnDetails.zipBomb", parentAr.getDepth(), archiveFilePath); // MessageNotifyUtil.Notify.error(msg, details); services.postMessage( IngestMessage.createWarningMessage( EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details)); return; } boolean hasEncrypted = false; boolean fullEncryption = true; ISevenZipInArchive inArchive = null; SevenZipContentReadStream stream = null; final ProgressHandle progress = ProgressHandleFactory.createHandle( NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.moduleName")); int processedItems = 0; boolean progressStarted = false; try { stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile)); // for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive // inside RAR archive // it will be opened incorrectly when using 7zip's built-in auto-detect functionality. // All other archive formats are still opened using 7zip built-in auto-detect functionality. ArchiveFormat options = get7ZipOptions(archiveFile); inArchive = SevenZip.openInArchive(options, stream); int numItems = inArchive.getNumberOfItems(); logger.log( Level.INFO, "Count of items in archive: {0}: {1}", new Object[] {archiveFilePath, numItems}); // NON-NLS progress.start(numItems); progressStarted = true; final ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface(); // setup the archive local root folder final String uniqueArchiveFileName = EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile); final String localRootAbsPath = getLocalRootAbsPath(uniqueArchiveFileName); final File localRoot = new File(localRootAbsPath); if (!localRoot.exists()) { try { localRoot.mkdirs(); } catch (SecurityException e) { logger.log( Level.SEVERE, "Error setting up output path for archive root: {0}", localRootAbsPath); // NON-NLS // bail return; } } // initialize tree hierarchy to keep track of unpacked file structure SevenZipExtractor.UnpackedTree unpackedTree = new SevenZipExtractor.UnpackedTree( moduleDirRelative + "/" + uniqueArchiveFileName, archiveFile); long freeDiskSpace = services.getFreeDiskSpace(); // unpack and process every item in archive int itemNumber = 0; for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) { String pathInArchive = item.getPath(); if (pathInArchive == null || pathInArchive.isEmpty()) { // some formats (.tar.gz) may not be handled correctly -- file in archive has no name/path // handle this for .tar.gz and tgz but assuming the child is tar, // otherwise, unpack using itemNumber as name // TODO this should really be signature based, not extension based String archName = archiveFile.getName(); int dotI = archName.lastIndexOf("."); String useName = null; if (dotI != -1) { String base = archName.substring(0, dotI); String ext = archName.substring(dotI); switch (ext) { case ".gz": // NON-NLS useName = base; break; case ".tgz": // NON-NLS useName = base + ".tar"; // NON-NLS break; } } if (useName == null) { pathInArchive = "/" + archName + "/" + Integer.toString(itemNumber); } else { pathInArchive = "/" + useName; } String msg = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.unknownPath.msg", archiveFilePath, pathInArchive); logger.log(Level.WARNING, msg); } ++itemNumber; logger.log(Level.INFO, "Extracted item path: {0}", pathInArchive); // NON-NLS // check if possible zip bomb if (isZipBombArchiveItemCheck(archiveFile, item)) { continue; // skip the item } // find this node in the hierarchy, create if needed SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive); String fileName = unpackedNode.getFileName(); // update progress bar progress.progress(archiveFile.getName() + ": " + fileName, processedItems); final boolean isEncrypted = item.isEncrypted(); final boolean isDir = item.isFolder(); if (isEncrypted) { logger.log( Level.WARNING, "Skipping encrypted file in archive: {0}", pathInArchive); // NON-NLS hasEncrypted = true; continue; } else { fullEncryption = false; } final Long size = item.getSize(); if (size == null) { // If the size property cannot be determined, out-of-disk-space // situations cannot be ascertained. // Hence skip this file. logger.log( Level.WARNING, "Size cannot be determined. Skipping file in archive: {0}", pathInArchive); // NON-NLS continue; } // check if unpacking this file will result in out of disk space // this is additional to zip bomb prevention mechanism if (freeDiskSpace != IngestMonitor.DISK_FREE_SPACE_UNKNOWN && size > 0) { // if known free space and file not empty long newDiskSpace = freeDiskSpace - size; if (newDiskSpace < MIN_FREE_DISK_SPACE) { String msg = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.msg", archiveFilePath, fileName); String details = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.details"); // MessageNotifyUtil.Notify.error(msg, details); services.postMessage( IngestMessage.createErrorMessage( EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details)); logger.log( Level.INFO, "Skipping archive item due to insufficient disk space: {0}, {1}", new Object[] {archiveFilePath, fileName}); // NON-NLS logger.log( Level.INFO, "Available disk space: {0}", new Object[] {freeDiskSpace}); // NON-NLS continue; // skip this file } else { // update est. disk space during this archive, so we don't need to poll for every file // extracted freeDiskSpace = newDiskSpace; } } final String uniqueExtractedName = uniqueArchiveFileName + File.separator + (item.getItemIndex() / 1000) + File.separator + item.getItemIndex() + new File(pathInArchive).getName(); // final String localRelPath = unpackDir + File.separator + localFileRelPath; final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName; final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName; // create local dirs and empty files before extracted File localFile = new java.io.File(localAbsPath); // cannot rely on files in top-bottom order if (!localFile.exists()) { try { if (isDir) { localFile.mkdirs(); } else { localFile.getParentFile().mkdirs(); try { localFile.createNewFile(); } catch (IOException ex) { logger.log( Level.SEVERE, "Error creating extracted file: " + localFile.getAbsolutePath(), ex); // NON-NLS } } } catch (SecurityException e) { logger.log( Level.SEVERE, "Error setting up output path for unpacked file: {0}", pathInArchive); // NON-NLS // TODO consider bail out / msg to the user } } // skip the rest of this loop if we couldn't create the file if (localFile.exists() == false) { continue; } final Date createTime = item.getCreationTime(); final Date accessTime = item.getLastAccessTime(); final Date writeTime = item.getLastWriteTime(); final long createtime = createTime == null ? 0L : createTime.getTime() / 1000; final long modtime = writeTime == null ? 0L : writeTime.getTime() / 1000; final long accesstime = accessTime == null ? 0L : accessTime.getTime() / 1000; // record derived data in unode, to be traversed later after unpacking the archive unpackedNode.addDerivedInfo( size, !isDir, 0L, createtime, accesstime, modtime, localRelPath); // unpack locally if a file if (!isDir) { SevenZipExtractor.UnpackStream unpackStream = null; try { unpackStream = new SevenZipExtractor.UnpackStream(localAbsPath); item.extractSlow(unpackStream); } catch (Exception e) { // could be something unexpected with this file, move on logger.log( Level.WARNING, "Could not extract file from archive: " + localAbsPath, e); // NON-NLS } finally { if (unpackStream != null) { unpackStream.close(); } } } // update units for progress bar ++processedItems; } // add them to the DB. We wait until the end so that we have the metadata on all of the // intermediate nodes since the order is not guaranteed try { unpackedTree.addDerivedFilesToCase(); unpackedFiles = unpackedTree.getAllFileObjects(); // check if children are archives, update archive depth tracking for (AbstractFile unpackedFile : unpackedFiles) { if (isSevenZipExtractionSupported(unpackedFile)) { archiveDepthCountTree.addArchive(parentAr, unpackedFile.getId()); } } } catch (TskCoreException e) { logger.log( Level.SEVERE, "Error populating complete derived file hierarchy from the unpacked dir structure"); // NON-NLS // TODO decide if anything to cleanup, for now bailing } } catch (SevenZipException ex) { logger.log(Level.SEVERE, "Error unpacking file: " + archiveFile, ex); // NON-NLS // inbox message // print a message if the file is allocated if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.ALLOC)) { String msg = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.msg", archiveFile.getName()); String details = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.details", archiveFilePath, ex.getMessage()); services.postMessage( IngestMessage.createErrorMessage( EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details)); } } finally { if (inArchive != null) { try { inArchive.close(); } catch (SevenZipException e) { logger.log(Level.SEVERE, "Error closing archive: " + archiveFile, e); // NON-NLS } } if (stream != null) { try { stream.close(); } catch (IOException ex) { logger.log( Level.SEVERE, "Error closing stream after unpacking archive: " + archiveFile, ex); // NON-NLS } } // close progress bar if (progressStarted) { progress.finish(); } } // create artifact and send user message if (hasEncrypted) { String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL; try { BlackboardArtifact artifact = archiveFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED); artifact.addAttribute( new BlackboardAttribute( BlackboardAttribute.ATTRIBUTE_TYPE.TSK_NAME.getTypeID(), EmbeddedFileExtractorModuleFactory.getModuleName(), encryptionType)); services.fireModuleDataEvent( new ModuleDataEvent( EmbeddedFileExtractorModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED)); } catch (TskCoreException ex) { logger.log( Level.SEVERE, "Error creating blackboard artifact for encryption detected for file: " + archiveFilePath, ex); // NON-NLS } String msg = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.msg"); String details = NbBundle.getMessage( this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.details", archiveFile.getName(), EmbeddedFileExtractorModuleFactory.getModuleName()); services.postMessage( IngestMessage.createWarningMessage( EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details)); } // adding unpacked extracted derived files to the job after closing relevant resources. if (!unpackedFiles.isEmpty()) { // currently sending a single event for all new files services.fireModuleContentEvent(new ModuleContentEvent(archiveFile)); context.addFilesToJob(unpackedFiles); } }