/** * Downloads the given file specified via url to the given canonicalDestination. * * @param urlSource String * @param urlDestination String * @throws Exception */ @Override public void downloadFile(String urlSource, String urlDestination) throws Exception { // sanity check if (urlSource == null || urlSource.length() == 0 || urlDestination == null || urlDestination.length() == 0) { throw new IllegalArgumentException( "downloadFile(): urlSource or urlDestination argument is null..."); } // URLs for given parameters URL source = new URL(urlSource); URL destination = new URL(urlDestination); // we have a compressed file if (GzipUtils.isCompressedFilename(urlSource)) { // downlod to temp destination File tempDestinationFile = org.apache.commons.io.FileUtils.getFile( org.apache.commons.io.FileUtils.getTempDirectory(), new File(source.getFile()).getName()); if (LOG.isInfoEnabled()) { LOG.info("downloadFile(), " + urlSource + ", this may take a while..."); } org.apache.commons.io.FileUtils.copyURLToFile(source, tempDestinationFile); if (LOG.isInfoEnabled()) { LOG.info("downloadFile(), gunzip: we have compressed file, decompressing..."); } // decompress the file gunzip(tempDestinationFile.getCanonicalPath()); if (LOG.isInfoEnabled()) { LOG.info("downloadFile(), gunzip complete..."); } // move temp/decompressed file to final destination File destinationFile = new File(destination.getFile()); if (destinationFile.exists()) { org.apache.commons.io.FileUtils.forceDelete(destinationFile); } org.apache.commons.io.FileUtils.moveFile( org.apache.commons.io.FileUtils.getFile( GzipUtils.getUncompressedFilename(tempDestinationFile.getCanonicalPath())), destinationFile); // lets cleanup after ourselves - remove compressed file tempDestinationFile.delete(); } // uncompressed file, download directry to urlDestination else { if (LOG.isInfoEnabled()) { LOG.info("downloadFile(), " + urlSource + ", this may take a while..."); } org.apache.commons.io.FileUtils.copyURLToFile( source, org.apache.commons.io.FileUtils.getFile(destination.getFile())); } }
/** * @param uri The URI of the file to identify * @param request The Identification Request * @throws CommandExecutionException When an exception happens during execution * @throws CommandExecutionException When an exception happens during archive file input/output */ public final void identify(final URI uri, final IdentificationRequest request) throws CommandExecutionException { final String newPath = makeContainerURI("gzip", request.getFileName()); setSlash1(""); final URI newUri = URI.create(GzipUtils.getUncompressedFilename(uri.toString())); final RequestIdentifier identifier = new RequestIdentifier(newUri); final RequestMetaData metaData = new RequestMetaData(SIZE, TIME, uri.getPath()); final GZipIdentificationRequest gzRequest = new GZipIdentificationRequest(metaData, identifier, getTmpDir()); GzipCompressorInputStream gzin = null; try { gzin = new GzipCompressorInputStream(new FileInputStream(request.getSourceFile()), true); expandContainer(gzRequest, gzin, newPath); } catch (IOException ioe) { System.err.println(ioe + " (" + newPath + ")"); // continue after corrupt archive } finally { if (gzin != null) { try { gzin.close(); } catch (IOException ioe) { throw new CommandExecutionException(ioe.getMessage(), ioe); } } } }
/** * Returns the contents of the datafile as specified by ImportDataRecord in an DataMatrix. May * return null if there is a problem reading the file. * * @param importDataRecord ImportDataRecord * @return DataMatrix * @throws Exception */ @Override public DataMatrix getFileContents(ImportDataRecord importDataRecord) throws Exception { if (LOG.isInfoEnabled()) { LOG.info("getFileContents(): " + importDataRecord); } // determine path to file (does override file exist?) String fileCanonicalPath = importDataRecord.getCanonicalPathToData(); // get filedata inputstream InputStream fileContents; // data can be compressed if (GzipUtils.isCompressedFilename(fileCanonicalPath.toLowerCase())) { if (LOG.isInfoEnabled()) { LOG.info("getFileContents(): processing file: " + fileCanonicalPath); } fileContents = readContent( importDataRecord, org.apache.commons.io.FileUtils.openInputStream(new File(fileCanonicalPath))); } else { if (LOG.isInfoEnabled()) { LOG.info("getFileContents(): processing file: " + fileCanonicalPath); } fileContents = org.apache.commons.io.FileUtils.openInputStream(new File(fileCanonicalPath)); } // outta here return getDataMatrix(fileContents); }
@Override public CompressionMode getEffectiveCompressionMode(String filename) { if (GzipUtils.isCompressedFilename(filename)) { return GZIP; } else if (XZUtils.isCompressedFilename(filename)) { return XZ; } else { return NONE; } }
/** * @return * @throws NumberFormatException * @throws IOException * @throws FileNotFoundException */ private static Word2Vec readBinaryModel(File modelFile) throws NumberFormatException, IOException { InMemoryLookupTable lookupTable; VocabCache cache; INDArray syn0; int words, size; try (BufferedInputStream bis = new BufferedInputStream( GzipUtils.isCompressedFilename(modelFile.getName()) ? new GZIPInputStream(new FileInputStream(modelFile)) : new FileInputStream(modelFile)); DataInputStream dis = new DataInputStream(bis)) { words = Integer.parseInt(readString(dis)); size = Integer.parseInt(readString(dis)); syn0 = Nd4j.create(words, size); cache = new InMemoryLookupCache(false); lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder().cache(cache).vectorLength(size).build(); String word; for (int i = 0; i < words; i++) { word = readString(dis); log.trace("Loading " + word + " with word " + i); if (word.isEmpty()) { continue; } float[] vector = new float[size]; for (int j = 0; j < size; j++) { vector[j] = readFloat(dis); } syn0.putRow(i, Transforms.unitVec(Nd4j.create(vector))); cache.addWordToIndex(cache.numWords(), word); cache.addToken(new VocabWord(1, word)); cache.putVocabWord(word); } } Word2Vec ret = new Word2Vec(); lookupTable.setSyn0(syn0); ret.setVocab(cache); ret.setLookupTable(lookupTable); return ret; }
/** * Helper function to gunzip file. gzipFile param is canonical path. * * @param gzipFile String */ private static void gunzip(String gzipFile) throws Exception { // setup our gzip inputs tream FileOutputStream fos = null; String outFilePath = GzipUtils.getUncompressedFilename(gzipFile); GZIPInputStream gis = new GZIPInputStream(new FileInputStream(gzipFile)); try { // unzip into file less the .gz fos = new FileOutputStream(outFilePath); IOUtils.copy(gis, fos); } finally { // close up our streams IOUtils.closeQuietly(gis); if (fos != null) IOUtils.closeQuietly(fos); } }