private void close() throws IOException { for (SequenceFile.Writer writer : writers.values()) { writer.close(); } writers.clear(); LOG.info("closed writer"); }
private static void createControlFile( FileSystem fs, int fileSize, // in MB int nrFiles) throws IOException { LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files"); fs.delete(CONTROL_DIR, true); for (int i = 0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(CONTROL_DIR, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter( fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(fileSize)); } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) writer.close(); writer = null; } } LOG.info("created control files for: " + nrFiles + " files"); }
public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed) throws Exception { LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files"); Path controlFile = new Path(CONTROL_DIR, "files"); fs.delete(controlFile, true); Random random = new Random(seed); SequenceFile.Writer writer = SequenceFile.createWriter( fs, conf, controlFile, Text.class, LongWritable.class, CompressionType.NONE); long totalSize = 0; long maxSize = ((megaBytes / numFiles) * 2) + 1; try { while (totalSize < megaBytes) { Text name = new Text(Long.toString(random.nextLong())); long size = random.nextLong(); if (size < 0) size = -size; size = size % maxSize; // LOG.info(" adding: name="+name+" size="+size); writer.append(name, new LongWritable(size)); totalSize += size; } } finally { writer.close(); } LOG.info("created control file for: " + totalSize + " bytes"); }
@Override public boolean writeData(String uri, byte[] data) { /* * Delete the parent folder if the parent folder exists * */ File f = new File(uri); if (f.getName().equals(storageConfiguration.getProperty("postfix"))) { f = f.getParentFile(); Path file = new Path(String.valueOf(f)); try { if (fileSystem.exists(file)) { fileSystem.delete(file, true); } } catch (IOException e) { e.printStackTrace(); } } SequenceFile.Writer writer = getWriterFor(uri); HDFSByteChunk byteChunk = new HDFSByteChunk(data, uri); try { writer.append(new IntWritable(0), byteChunk); writer.close(); } catch (IOException e) { e.printStackTrace(); return false; } return true; }
/** * Create a data file in SequenceFile format that gets exported to the db. * * @param fileNum the number of the file (for multi-file export). * @param numRecords how many records to write to the file. * @param className the table class name to instantiate and populate for each record. */ private void createSequenceFile(int fileNum, int numRecords, String className) throws IOException { try { // Instantiate the value record object via reflection. Class cls = Class.forName(className, true, Thread.currentThread().getContextClassLoader()); SqoopRecord record = (SqoopRecord) ReflectionUtils.newInstance(cls, new Configuration()); // Create the SequenceFile. Configuration conf = new Configuration(); conf.set("fs.default.name", "file:///"); FileSystem fs = FileSystem.get(conf); Path tablePath = getTablePath(); Path filePath = new Path(tablePath, "part" + fileNum); fs.mkdirs(tablePath); SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, filePath, LongWritable.class, cls); // Now write the data. int startId = fileNum * numRecords; for (int i = 0; i < numRecords; i++) { record.parse(getRecordLine(startId + i)); w.append(new LongWritable(startId + i), record); } w.close(); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } catch (RecordParser.ParseError pe) { throw new IOException(pe); } }
@Override public boolean write(String uri, InputStream stream) { SequenceFile.Writer writer = getWriterFor(uri); try { int size = stream.available(); byte[] bytes = new byte[size]; int readBytes = stream.read(bytes); if (readBytes != size) { log.error( "Could not read all the bytes from the inputStream. Read " + readBytes + " instead of " + size); return false; } HDFSByteChunk byteChunk = new HDFSByteChunk(bytes, uri); writer.append(new IntWritable(0), byteChunk); writer.close(); return true; } catch (IOException e) { e.printStackTrace(); return false; } }
@Override public boolean write(String uri, List<InputStream> streams) { boolean result = false; ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); try { for (InputStream stream : streams) { int size = stream.available(); byte[] bytes = new byte[size]; int readBytes = stream.read(bytes); if (readBytes != size) { log.error( "Could not read all the bytes from the inputStream. Read " + readBytes + " instead of " + size); return false; } outputStream.write(bytes); } HDFSByteChunk byteChunk = new HDFSByteChunk(outputStream.toByteArray(), uri); SequenceFile.Writer writer = getWriterFor(uri); writer.append(new IntWritable(0), byteChunk); writer.close(); } catch (IOException e) { e.printStackTrace(); } return result; }
@SuppressWarnings("deprecation") private void createControlFile( FileSystem fs, long nrBytes, // in bytes int nrFiles) throws IOException { LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files"); Path controlDir = getControlDir(config); fs.delete(controlDir, true); for (int i = 0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(controlDir, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter( fs, config, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(nrBytes)); } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) writer.close(); writer = null; } } LOG.info("created control files for: " + nrFiles + " files"); }
private void writeSeqenceFileTest( FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException { byte[][] columnRandom; resetRandomGenerators(); BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum); columnRandom = new byte[columnNum][]; for (int i = 0; i < columnNum; i++) { BytesRefWritable cu = new BytesRefWritable(); bytes.set(i, cu); } // zero length key is not allowed by block compress writer, so we use a byte // writable ByteWritable key = new ByteWritable(); SequenceFile.Writer seqWriter = SequenceFile.createWriter( fs, conf, file, ByteWritable.class, BytesRefArrayWritable.class, CompressionType.BLOCK, codec); for (int i = 0; i < rowCount; i++) { nextRandomRow(columnRandom, bytes); seqWriter.append(key, bytes); } seqWriter.close(); }
/** * Write out a SequenceFile that can be read by TotalOrderPartitioner that contains the split * points in startKeys. * * <p>This method was copied from HFileOutputFormat in hbase-0.90.1-cdh3u0. I had to copy it * because it's private. * * @param conf The job configuration. * @param partitionsPath output path for SequenceFile. * @param startKeys the region start keys to use as the partitions. * @throws IOException If there is an error. */ private static void writePartitionFile( Configuration conf, Path partitionsPath, List<HFileKeyValue> startKeys) throws IOException { if (startKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); } // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0. TreeSet<HFileKeyValue> sorted = new TreeSet<HFileKeyValue>(); sorted.addAll(startKeys); HFileKeyValue first = sorted.first(); if (0 != first.getRowKey().length) { throw new IllegalArgumentException( "First region of table should have empty start row key. Instead has: " + Bytes.toStringBinary(first.getRowKey())); } sorted.remove(first); // Write the actual file final SequenceFile.Writer writer = KijiMRPlatformBridge.get() .newSeqFileWriter(conf, partitionsPath, HFileKeyValue.class, NullWritable.class); try { for (HFileKeyValue startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }
/** creates the input file (containing the names of the files to be fixed */ private List<String> createInputFile( String jobName, Path inDir, Map<String, Integer> corruptFilePriority, int priority) throws IOException { Path file = new Path(inDir, jobName + IN_FILE_SUFFIX); FileSystem fs = file.getFileSystem(getConf()); SequenceFile.Writer fileOut = SequenceFile.createWriter(fs, getConf(), file, LongWritable.class, Text.class); long index = 0L; List<String> filesAdded = new ArrayList<String>(); int count = 0; final long max = filesPerTask * BLOCKFIX_TASKS_PER_JOB; for (Map.Entry<String, Integer> entry : corruptFilePriority.entrySet()) { if (entry.getValue() != priority) { continue; } if (count >= max) { break; } String corruptFileName = entry.getKey(); fileOut.append(new LongWritable(index++), new Text(corruptFileName)); filesAdded.add(corruptFileName); count++; if (index % filesPerTask == 0) { fileOut.sync(); // create sync point to make sure we can split here } } fileOut.close(); return filesAdded; }
/** * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
public static void writeCanopyCenters(Configuration conf, ArrayList<StockVector> canopyCenters) throws IOException { FileSystem fs = FileSystem.get(conf); IntWritable IntKey = new IntWritable(1); Path canopyFileName = new Path(Nasdaq.CANOPY_SEQ_FILE_PATH); System.out.println("before seq file"); // create file @SuppressWarnings("deprecation") final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, canopyFileName, StockVector.class, IntWritable.class); System.out.println("after seq file"); System.out.println("canopies" + canopyCenters.size()); for (StockVector canopyCenter : canopyCenters) { // write canopy to file writer.append(canopyCenter, IntKey); System.out.println("sum " + canopyCenter.GetSum()); } System.out.println("canopies end" + canopyCenters.size()); // close writer and file system writer.close(); // fs.close(); }
/** * set up input file which has the list of input files. * * @return boolean * @throws IOException */ private boolean setup() throws IOException { estimateSavings(); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobconf); Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId); LOG.info(JOB_DIR_LABEL + "=" + jobdir); jobconf.set(JOB_DIR_LABEL, jobdir.toString()); Path log = new Path(jobdir, "_logs"); // The control file should have small size blocks. This helps // in spreading out the load from mappers that will be spawned. jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE); FileOutputFormat.setOutputPath(jobconf, log); LOG.info("log=" + log); // create operation list FileSystem fs = jobdir.getFileSystem(jobconf); Path opList = new Path(jobdir, "_" + OP_LIST_LABEL); jobconf.set(OP_LIST_LABEL, opList.toString()); int opCount = 0, synCount = 0; SequenceFile.Writer opWriter = null; try { opWriter = SequenceFile.createWriter( fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE); for (RaidPolicyPathPair p : raidPolicyPathPairList) { // If a large set of files are Raided for the first time, files // in the same directory that tend to have the same size will end up // with the same map. This shuffle mixes things up, allowing a better // mix of files. java.util.Collections.shuffle(p.srcPaths); for (FileStatus st : p.srcPaths) { opWriter.append(new Text(st.getPath().toString()), p.policy); opCount++; if (++synCount > SYNC_FILE_MAX) { opWriter.sync(); synCount = 0; } } } } finally { if (opWriter != null) { opWriter.close(); } fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file } raidPolicyPathPairList.clear(); jobconf.setInt(OP_COUNT_LABEL, opCount); LOG.info("Number of files=" + opCount); jobconf.setNumMapTasks( getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers())); LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks()); return opCount != 0; }
@SuppressWarnings("deprecation") @Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); List<Cluster> newKMeansClusters = new ArrayList<Cluster>(); List<Cluster> newCanopyClusters = new ArrayList<Cluster>(); for (Cluster kMeansCluster : _clusters.keySet()) { Cluster canopyCluster = _kMeansToCanopyMap.get(kMeansCluster); // Set a new Cluster center Vector center = new Vector(); center.setElements(new double[kMeansCluster.getCenterVector().getElements().length]); List<Vector> vectors = new ArrayList<Vector>(); for (Vector currentVector : _clusters.get(kMeansCluster)) { vectors.add(new Vector(currentVector)); // Sums the vectors to a new vector in order to find the one that is the closest to all // others, it will be our new cluster center. for (int i = 0; i < currentVector.getElements().length; i++) center.getElements()[i] += currentVector.getElements()[i]; } // Divides the vector's elements in order to find its real location (it will be a fictive // vector) for (int i = 0; i < center.getElements().length; i++) center.getElements()[i] = center.getElements()[i] / vectors.size(); Cluster newKMeansCluster = new Cluster(center); canopyCluster.setIsCovered(newKMeansCluster.isConvergedWithOtherCluster(kMeansCluster)); newKMeansClusters.add(newKMeansCluster); newCanopyClusters.add(canopyCluster); // Adding the vectors to the new cluster center for (Vector vector : vectors) { context.write(newKMeansCluster, vector); } } Configuration conf = context.getConfiguration(); Path outPath = new Path(conf.get("centers.path")); FileSystem fs = FileSystem.get(conf); if (fs.exists(outPath)) fs.delete(outPath, true); SequenceFile.Writer writer = SequenceFile.createWriter( fs, context.getConfiguration(), outPath, Cluster.class, Cluster.class); context.getCounter(Counter.CONVERGED).setValue(0); for (int i = 0; i < newKMeansClusters.size(); i++) { writer.append(newCanopyClusters.get(i), newKMeansClusters.get(i)); if (newCanopyClusters.get(i).getIsCovered()) context.getCounter(Counter.CONVERGED).increment(1); } writer.close(); }
private static <T extends WritableComparable> Path writePartitionFile( String testname, JobConf conf, T[] splits) throws IOException { final FileSystem fs = FileSystem.getLocal(conf); final Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.setPartitionFile(conf, p); conf.setNumReduceTasks(splits.length + 1); SequenceFile.Writer w = null; try { NullWritable nw = NullWritable.get(); w = SequenceFile.createWriter( fs, conf, p, splits[0].getClass(), NullWritable.class, SequenceFile.CompressionType.NONE); for (int i = 0; i < splits.length; ++i) { w.append(splits[i], NullWritable.get()); } } finally { if (null != w) w.close(); } return p; }
private static Path saveVector(Configuration conf, Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); try { writer.append(new IntWritable(0), new VectorWritable(v)); } finally { writer.close(); } return path; }
/** Reduce task done, write output to a file. */ @Override public void close() throws IOException { // write output to a file Path outDir = new Path(TMP_DIR, "out"); Path outFile = new Path(outDir, "reduce-out"); FileSystem fileSys = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter( fileSys, conf, outFile, LongWritable.class, LongWritable.class, CompressionType.NONE); writer.append(new LongWritable(numInside), new LongWritable(numOutside)); writer.close(); }
public void testInputFormat() { try { JobConf conf = new JobConf(); String TMP_DIR = System.getProperty("test.build.data", "/tmp"); Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile"); SequenceFile.Writer sfw = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, filename, ChukwaArchiveKey.class, ChunkImpl.class, SequenceFile.CompressionType.NONE, Reporter.NULL); StringBuilder buf = new StringBuilder(); int offsets[] = new int[lines.length]; for (int i = 0; i < lines.length; ++i) { buf.append(lines[i]); buf.append("\n"); offsets[i] = buf.length() - 1; } ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0); ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null); val.setRecordOffsets(offsets); sfw.append(key, val); sfw.append(key, val); // write it twice sfw.close(); long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen(); InputSplit split = new FileSplit(filename, 0, len, (String[]) null); ChukwaInputFormat in = new ChukwaInputFormat(); RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL); LongWritable l = r.createKey(); Text line = r.createValue(); for (int i = 0; i < lines.length * 2; ++i) { boolean succeeded = r.next(l, line); assertTrue(succeeded); assertEquals(i, l.get()); assertEquals(lines[i % lines.length], line.toString()); System.out.println("read line: " + l.get() + " " + line); } boolean succeeded = r.next(l, line); assertFalse(succeeded); } catch (IOException e) { e.printStackTrace(); fail("IO exception " + e); } }
public static void writeClustersToFile( FileSystem fs, Configuration conf, int k, List<Vector> points, Path path) throws IOException { SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class); for (int i = 0; i < k; i++) { Vector vec = points.get(i); Kluster cluster = new Kluster(vec, i, new EuclideanDistanceMeasure()); writer.append(new Text(cluster.getIdentifier()), cluster); } writer.close(); // SequenceFileDumper.main(new String[] { "--input", inClusterFile.toString() }); }
/** Reduce task done, write output to a file. */ @Override public void cleanup(Context context) throws IOException { // write output to a file Configuration conf = context.getConfiguration(); Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR)); Path outFile = new Path(outDir, "reduce-out"); FileSystem fileSys = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter( fileSys, conf, outFile, LongWritable.class, LongWritable.class, CompressionType.NONE); writer.append(new LongWritable(numInside), new LongWritable(numOutside)); writer.close(); }
private void generateTestData() { try { SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, new Path(INPUT), LongWritable.class, Text.class); for (int i = 0; i < input.length; i++) { writer.append(new LongWritable(i), new Text(input[i])); } writer.close(); } catch (Exception e) { e.printStackTrace(); } }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); Configuration conf = context.getConfiguration(); Path outPath = new Path(conf.get(CENTERS_CONF_KEY)); FileSystem fs = FileSystem.get(conf); // fs.delete(outPath, true); SequenceFile.Writer writer = SequenceFile.createWriter( fs, context.getConfiguration(), outPath, Centroid.class, IntWritable.class); final IntWritable mockValue = new IntWritable(0); for (Centroid center : centers) { writer.append(center, mockValue); } writer.close(); }
public static void writePointsToFile( FileSystem fs, Configuration conf, List<Vector> points, Path path) throws IOException { SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, LongWritable.class, VectorWritable.class); long recNum = 0; VectorWritable vec = new VectorWritable(); for (Vector point : points) { vec.set(point); writer.append(new LongWritable(recNum++), vec); } writer.close(); // VectorDumper.main(new String[] { "--input", inPointFile.toString() }); // SequenceFileDumper.main(new String[] { "--input", inPointFile.toString() }); }
@Test public void testReadString() throws Exception { if (SKIP) { return; } // final Path file = new Path("hdfs://localhost:9000/tmp/test/test-hdfs-file"); final Path file = new Path(new File("../../../../target/test/test-camel-string").getAbsolutePath()); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); // now set classes for filesystems. This is normally done using java.util.ServiceLoader which // doesn't // work inside OSGi. conf.setClass("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class, FileSystem.class); conf.setClass( "fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class, FileSystem.class); SequenceFile.Writer writer = SequenceFile.createWriter( conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(NullWritable.class), SequenceFile.Writer.valueClass(Text.class)); NullWritable keyWritable = NullWritable.get(); Text valueWritable = new Text(); String value = "CIAO!"; valueWritable.set(value); writer.append(keyWritable, valueWritable); writer.sync(); writer.close(); context.addRoutes( new RouteBuilder() { public void configure() { // // from("hdfs2://localhost:9000/tmp/test/test-hdfs-file?fileSystemType=HDFS&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result"); from("hdfs2:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0") .to("mock:result"); } }); context.start(); MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class); resultEndpoint.expectedMessageCount(1); resultEndpoint.assertIsSatisfied(); }
public static void writeVectorsToFile( FileSystem fs, Configuration conf, List<Vector> vectors, Path path) throws IOException { SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); for (Vector vector : vectors) { vec.set(vector); if (NamedVector.class.isAssignableFrom(vector.getClass())) { writer.append(new Text(((NamedVector) vector).getName()), vec); } else { writer.append(new Text(vector.toString()), vec); } } writer.close(); // VectorDumper.main(new String[] { "--input", inPointFile.toString() }); // SequenceFileDumper.main(new String[] { "--input", inPointFile.toString() }); }
/** {@inheritDoc} */ @Override public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException { SequenceFile.Writer fileListWriter = null; try { fileListWriter = getWriter(pathToListingFile); for (Path path : options.getSourcePaths()) { FileSystem sourceFS = path.getFileSystem(getConf()); path = makeQualified(path); FileStatus rootStatus = sourceFS.getFileStatus(path); Path sourcePathRoot = computeSourceRootPath(rootStatus, options); boolean localFile = (rootStatus.getClass() != FileStatus.class); FileStatus[] sourceFiles = sourceFS.listStatus(path); if (sourceFiles != null && sourceFiles.length > 0) { for (FileStatus sourceStatus : sourceFiles) { if (LOG.isDebugEnabled()) { LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy."); } writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile); if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) { if (LOG.isDebugEnabled()) { LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath()); } traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile); } } } else { writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile); } } } finally { try { if (fileListWriter != null) fileListWriter.close(); } catch (IOException exception) { LOG.error("Could not close output-steam to the file-list: ", exception); throw exception; } } }
public void createBucketWithRandomTS(String s, int EXP, int ANZ) throws IOException { DecimalFormat df = new DecimalFormat("0.000"); System.out.println("--> create bucket : uncorrelated TS alpha=0.5"); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); Path path = new Path(outputDir + "/" + s + "_alpha_0.5_.tsb.vec.seq"); System.out.println("--> create bucket : " + path.toString()); // write a SequenceFile form a Vector SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, path, Text.class, VectorWritable.class); System.out.println( "--> process bucket : Uniform-Random-Generator ( z=" + ANZ + ", l=" + Math.pow(2, EXP) + ", TestA )"); int SAMPLES = 0; for (int i = 0; i < ANZ; i++) { TSData data = new TSData(); data.dataset = processTESTA(data.getRandomData((int) Math.pow(2, EXP))); Messreihe mr = data.getMessreihe(); if (SAMPLES < TSPropertyTester.zSAMPLES) TSPropertyTester.addSample(mr); SAMPLES++; /** Here we lose the METADATA of each row!!! */ System.out.print(" (" + i + ")"); NamedVector nv = new NamedVector(new DenseVector(data.getData()), data.label); VectorWritable vec = new VectorWritable(); vec.set(nv); writer.append(new Text(nv.getName()), vec); } writer.close(); System.out.println("### DONE : " + path.toString()); }
/** * Create LRC time series ... * * @param s * @param z * @param EXP * @param BETA * @throws IOException * @throws Exception */ public void createBucketWithRandomTS(String s, int z, int EXP, double BETA) throws IOException, Exception { DecimalFormat df = new DecimalFormat("0.000"); System.out.println("--> create bucket : LRC with beta=" + df.format(BETA)); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); Path path = new Path(outputDir + "/" + s + "_LRC_beta_" + df.format(BETA) + ".tsb.vec.seq"); System.out.println("--> create bucket : " + path.toString()); // write a SequenceFile form a Vector SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, path, Text.class, VectorWritable.class); System.out.println("--> process bucket : LRC-Generator (" + z + ")"); int SAMPLES = 0; for (int i = 0; i < z; i++) { boolean showTESTS = false; Messreihe mr = LongTermCorrelationSeriesGenerator.getRandomRow( (int) Math.pow(2, EXP), BETA, showTESTS, false); if (SAMPLES < TSPropertyTester.zSAMPLES) TSPropertyTester.addSample(mr); SAMPLES++; TSData data = TSData.convertMessreihe(mr); System.out.println("(" + i + ")"); NamedVector nv = new NamedVector(new DenseVector(data.getData()), data.label); VectorWritable vec = new VectorWritable(); vec.set(nv); writer.append(new Text(nv.getName()), vec); } writer.close(); System.out.println("### DONE : " + path.toString()); }
/** * Im sourceFolder wird eine komplette Gruppe gewählt und in einen TS Bucket überführt. * * <p>==> ist nur eine SAVE Funktion ... * * @param groupFolder */ public void createBucketFromLocalFilesInDirectory(String groupFolder, int limit) throws IOException { LIMIT = limit; String s = groupFolder; File f = new File(sourcFolder + s); System.out.println("--> load data : " + f.getAbsolutePath()); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); Path path = new Path(outputDir + "/" + s + ".tsb.vec.seq"); System.out.println("--> create bucket : " + path.toString()); // write a SequenceFile form a Vector SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, path, Text.class, VectorWritable.class); File[] liste = f.listFiles(new AccessFileFilter()); System.out.println(liste.length); System.out.println("--> process bucket : " + f.getAbsolutePath() + " (" + liste.length + ")"); int c = 0; for (File file : liste) { c++; if (c < LIMIT) { TSData data = new TSData(file); System.out.println("(" + c + ")"); NamedVector nv = new NamedVector(new DenseVector(data.getData()), data.label); VectorWritable vec = new VectorWritable(); vec.set(nv); writer.append(new Text(nv.getName()), vec); } if (c % 10000 == 0) { System.out.println(c); } } writer.close(); }