public boolean accept(Path file) { try { FileSystem fs = file.getFileSystem(conf); boolean unpack = conf.getBoolean(unpackParamName, true); if (defaultIgnores.accept(file) && fs.getFileStatus(file).isDir() == false) { String URI = file.toUri().toString(); // detect whether a file is likely to be an archive // TODO extend to other known types if (unpack && URI.toLowerCase().endsWith(".zip")) { FSDataInputStream fis = null; try { fis = fs.open(file); ArchiveInputStream input = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(fis)); ArchiveEntry entry = null; while ((entry = input.getNextEntry()) != null) { String name = entry.getName(); long size = entry.getSize(); byte[] content = new byte[(int) size]; input.read(content); key.set(name); // fill the values for the content object value.setUrl(name); value.setContent(content); writer.append(key, value); counter++; if (reporter != null) { reporter.incrCounter(Counters.DOC_COUNT, 1); } } } catch (ArchiveException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { fis.close(); } } else { // Hmm, kind of dangerous to do this byte[] fileBArray = new byte[(int) fs.getFileStatus(file).getLen()]; FSDataInputStream fis = null; try { fis = fs.open(file); fis.readFully(0, fileBArray); fis.close(); key.set(URI); // fill the values for the content object value.setUrl(URI); value.setContent(fileBArray); writer.append(key, value); counter++; if (reporter != null) { reporter.incrCounter(Counters.DOC_COUNT, 1); } } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } } } // if it is a directory, accept it so we can possibly recurse on // it, // otherwise we don't care about actually accepting the file, // since // all the work is done in the accept method here. return fs.getFileStatus(file).isDir(); } catch (IOException e) { log.error("Exception", e); } return false; }
private void handleTARArchive( ArchiveStoreContext ctx, FreenetURI key, InputStream data, String element, ArchiveExtractCallback callback, MutableBoolean gotElement, boolean throwAtExit, ClientContext context) throws ArchiveFailureException, ArchiveRestartException { if (logMINOR) Logger.minor(this, "Handling a TAR Archive"); TarArchiveInputStream tarIS = null; try { tarIS = new TarArchiveInputStream(data); // MINOR: Assumes the first entry in the tarball is a directory. ArchiveEntry entry; byte[] buf = new byte[32768]; HashSet<String> names = new HashSet<String>(); boolean gotMetadata = false; outerTAR: while (true) { try { entry = tarIS.getNextEntry(); } catch (IllegalArgumentException e) { // Annoyingly, it can throw this on some corruptions... throw new ArchiveFailureException("Error reading archive: " + e.getMessage(), e); } if (entry == null) break; if (entry.isDirectory()) continue; String name = stripLeadingSlashes(entry.getName()); if (names.contains(name)) { Logger.error(this, "Duplicate key " + name + " in archive " + key); continue; } long size = entry.getSize(); if (name.equals(".metadata")) gotMetadata = true; if (size > maxArchivedFileSize && !name.equals(element)) { addErrorElement( ctx, key, name, "File too big: " + size + " greater than current archived file size limit " + maxArchivedFileSize, true); } else { // Read the element long realLen = 0; Bucket output = tempBucketFactory.makeBucket(size); OutputStream out = output.getOutputStream(); try { int readBytes; while ((readBytes = tarIS.read(buf)) > 0) { out.write(buf, 0, readBytes); readBytes += realLen; if (readBytes > maxArchivedFileSize) { addErrorElement( ctx, key, name, "File too big: " + maxArchivedFileSize + " greater than current archived file size limit " + maxArchivedFileSize, true); out.close(); out = null; output.free(); continue outerTAR; } } } finally { if (out != null) out.close(); } if (size <= maxArchivedFileSize) { addStoreElement(ctx, key, name, output, gotElement, element, callback, context); names.add(name); trimStoredData(); } else { // We are here because they asked for this file. callback.gotBucket(output, context); gotElement.value = true; addErrorElement( ctx, key, name, "File too big: " + size + " greater than current archived file size limit " + maxArchivedFileSize, true); } } } // If no metadata, generate some if (!gotMetadata) { generateMetadata(ctx, key, names, gotElement, element, callback, context); trimStoredData(); } if (throwAtExit) throw new ArchiveRestartException("Archive changed on re-fetch"); if ((!gotElement.value) && element != null) callback.notInArchive(context); } catch (IOException e) { throw new ArchiveFailureException("Error reading archive: " + e.getMessage(), e); } finally { Closer.close(tarIS); } }
public static int unpackFileToFolder( @Nonnull final Log logger, @Nullable final String folder, @Nonnull final File archiveFile, @Nonnull final File destinationFolder, final boolean makeAllExecutable) throws IOException { final String normalizedName = archiveFile.getName().toLowerCase(Locale.ENGLISH); final ArchEntryGetter entryGetter; boolean modeZipFile = false; final ZipFile theZipFile; final ArchiveInputStream archInputStream; if (normalizedName.endsWith(".zip")) { logger.debug("Detected ZIP archive"); modeZipFile = true; theZipFile = new ZipFile(archiveFile); archInputStream = null; entryGetter = new ArchEntryGetter() { private final Enumeration<ZipArchiveEntry> iterator = theZipFile.getEntries(); @Override @Nullable public ArchiveEntry getNextEntry() throws IOException { ArchiveEntry result = null; if (this.iterator.hasMoreElements()) { result = this.iterator.nextElement(); } return result; } }; } else { theZipFile = null; final InputStream in = new BufferedInputStream(new FileInputStream(archiveFile)); try { if (normalizedName.endsWith(".tar.gz")) { logger.debug("Detected TAR.GZ archive"); archInputStream = new TarArchiveInputStream(new GZIPInputStream(in)); entryGetter = new ArchEntryGetter() { @Override @Nullable public ArchiveEntry getNextEntry() throws IOException { return ((TarArchiveInputStream) archInputStream).getNextTarEntry(); } }; } else { logger.debug("Detected OTHER archive"); archInputStream = ARCHIVE_STREAM_FACTORY.createArchiveInputStream(in); logger.debug("Created archive stream : " + archInputStream.getClass().getName()); entryGetter = new ArchEntryGetter() { @Override @Nullable public ArchiveEntry getNextEntry() throws IOException { return archInputStream.getNextEntry(); } }; } } catch (ArchiveException ex) { IOUtils.closeQuietly(in); throw new IOException("Can't recognize or read archive file : " + archiveFile, ex); } catch (CantReadArchiveEntryException ex) { IOUtils.closeQuietly(in); throw new IOException("Can't read entry from archive file : " + archiveFile, ex); } } try { final String normalizedFolder = folder == null ? null : FilenameUtils.normalize(folder, true) + '/'; int unpackedFilesCounter = 0; while (true) { final ArchiveEntry entry = entryGetter.getNextEntry(); if (entry == null) { break; } final String normalizedPath = FilenameUtils.normalize(entry.getName(), true); logger.debug("Detected archive entry : " + normalizedPath); if (normalizedFolder == null || normalizedPath.startsWith(normalizedFolder)) { final File targetFile = new File( destinationFolder, normalizedFolder == null ? normalizedPath : normalizedPath.substring(normalizedFolder.length())); if (entry.isDirectory()) { logger.debug("Folder : " + normalizedPath); if (!targetFile.exists() && !targetFile.mkdirs()) { throw new IOException("Can't create folder " + targetFile); } } else { final File parent = targetFile.getParentFile(); if (parent != null && !parent.isDirectory() && !parent.mkdirs()) { throw new IOException("Can't create folder : " + parent); } final FileOutputStream fos = new FileOutputStream(targetFile); try { if (modeZipFile) { logger.debug("Unpacking ZIP entry : " + normalizedPath); final InputStream zipEntryInStream = theZipFile.getInputStream((ZipArchiveEntry) entry); try { if (IOUtils.copy(zipEntryInStream, fos) != entry.getSize()) { throw new IOException( "Can't unpack file, illegal unpacked length : " + entry.getName()); } } finally { IOUtils.closeQuietly(zipEntryInStream); } } else { logger.debug("Unpacking archive entry : " + normalizedPath); if (!archInputStream.canReadEntryData(entry)) { throw new IOException("Can't read archive entry data : " + normalizedPath); } if (IOUtils.copy(archInputStream, fos) != entry.getSize()) { throw new IOException( "Can't unpack file, illegal unpacked length : " + entry.getName()); } } } finally { fos.close(); } if (makeAllExecutable) { try { targetFile.setExecutable(true, true); } catch (SecurityException ex) { throw new IOException("Can't make file executable : " + targetFile, ex); } } unpackedFilesCounter++; } } else { logger.debug("Archive entry " + normalizedPath + " ignored"); } } return unpackedFilesCounter; } finally { IOUtils.closeQuietly(theZipFile); IOUtils.closeQuietly(archInputStream); } }