private FSDataOutputStream create(Path f, Reporter reporter, FileStatus srcstat) throws IOException { if (destFileSys.exists(f)) { destFileSys.delete(f, false); } if (!preserve_status) { return destFileSys.create(f, true, sizeBuf, reporter); } FsPermission permission = preseved.contains(FileAttribute.PERMISSION) ? srcstat.getPermission() : null; short replication = preseved.contains(FileAttribute.REPLICATION) ? srcstat.getReplication() : destFileSys.getDefaultReplication(); long blockSize = preseved.contains(FileAttribute.BLOCK_SIZE) ? srcstat.getBlockSize() : destFileSys.getDefaultBlockSize(); return destFileSys.create(f, permission, true, sizeBuf, replication, blockSize, reporter); }
public static RawSequenceFileWriter createWriter( FileSystem fs, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType) throws IOException { return new RawSequenceFileWriter( fs, conf, name, keyClass, valClass, fs.getConf().getInt("io.file.buffer.size", 65536), fs.getDefaultReplication(), fs.getDefaultBlockSize(), null, new Metadata()); }
/** * Use the input splits to take samples of the input and generate sample keys. By default reads * 100,000 keys from 10 locations in the input, sorts them and picks N-1 keys to generate N * equally sized partitions. * * @param job the job to sample * @param partFile where to write the output file to * @throws IOException if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws IOException, InterruptedException { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraEncryptedInputFormat inFormat = new TeraEncryptedInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread("Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); System.exit(-1); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize()); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
public NoneCompression(FileSystem fileSystem, JavaSparkContext sparkContext) { this.sparkContext = sparkContext; this.inputBlockSize = fileSystem.getDefaultBlockSize(new Path("/")); }
public void testCreate() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, true); conf.set(FsPermission.UMASK_LABEL, "000"); MiniDFSCluster cluster = null; FileSystem fs = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); cluster.waitActive(); fs = FileSystem.get(conf); FsPermission rootPerm = checkPermission(fs, "/", null); FsPermission inheritPerm = FsPermission.createImmutable((short) (rootPerm.toShort() | 0300)); FsPermission dirPerm = new FsPermission((short) 0777); fs.mkdirs(new Path("/a1/a2/a3"), dirPerm); checkPermission(fs, "/a1", dirPerm); checkPermission(fs, "/a1/a2", dirPerm); checkPermission(fs, "/a1/a2/a3", dirPerm); dirPerm = new FsPermission((short) 0123); FsPermission permission = FsPermission.createImmutable((short) (dirPerm.toShort() | 0300)); fs.mkdirs(new Path("/aa/1/aa/2/aa/3"), dirPerm); checkPermission(fs, "/aa/1", permission); checkPermission(fs, "/aa/1/aa/2", permission); checkPermission(fs, "/aa/1/aa/2/aa/3", dirPerm); FsPermission filePerm = new FsPermission((short) 0444); FSDataOutputStream out = fs.create( new Path("/b1/b2/b3.txt"), filePerm, true, conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), fs.getDefaultReplication(), fs.getDefaultBlockSize(), null); out.write(123); out.close(); checkPermission(fs, "/b1", inheritPerm); checkPermission(fs, "/b1/b2", inheritPerm); checkPermission(fs, "/b1/b2/b3.txt", filePerm); conf.set(FsPermission.UMASK_LABEL, "022"); permission = FsPermission.createImmutable((short) 0666); FileSystem.mkdirs(fs, new Path("/c1"), new FsPermission(permission)); FileSystem.create(fs, new Path("/c1/c2.txt"), new FsPermission(permission)); checkPermission(fs, "/c1", permission); checkPermission(fs, "/c1/c2.txt", permission); } finally { try { if (fs != null) fs.close(); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } try { if (cluster != null) cluster.shutdown(); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } } }
@Override public long getDefaultBlockSize(FileSystem fs, Path path) { return fs.getDefaultBlockSize(path); }
/** * Log job submitted event to history. Creates a new file in history for the job. if history file * creation fails, it disables history for all other events. * * @param jobConfPath path to job conf xml file in HDFS. * @param submitTime time when job tracker received the job * @throws IOException */ public void logSubmitted(String jobConfPath, long submitTime, String jobTrackerId) throws IOException { if (disableHistory) { return; } // create output stream for logging in hadoop.job.history.location int defaultBufferSize = logDirFs.getConf().getInt("io.file.buffer.size", 4096); try { FSDataOutputStream out = null; PrintWriter writer = null; // In case the old JT is still running, but we can't connect to it, we // should ensure that it won't write to our (new JT's) job history file. if (logDirFs.exists(logFile)) { LOG.info("Remove the old history file " + logFile); logDirFs.delete(logFile, true); } out = logDirFs.create( logFile, new FsPermission(HISTORY_FILE_PERMISSION), true, defaultBufferSize, logDirFs.getDefaultReplication(), jobHistoryBlockSize, null); writer = new PrintWriter(out); fileManager.addWriter(jobId, writer); // cache it ... fileManager.setHistoryFile(jobId, logFile); writers = fileManager.getWriters(jobId); if (null != writers) { log( writers, RecordTypes.Meta, new Keys[] {Keys.VERSION}, new String[] {String.valueOf(JobHistory.VERSION)}); } String jobName = getJobName(); String user = getUserName(); // add to writer as well log( writers, RecordTypes.Job, new Keys[] { Keys.JOBID, Keys.JOBNAME, Keys.USER, Keys.SUBMIT_TIME, Keys.JOBCONF, Keys.JOBTRACKERID }, new String[] { jobId.toString(), jobName, user, String.valueOf(submitTime), jobConfPath, jobTrackerId }); } catch (IOException e) { // Disable history if we have errors other than in the user log. disableHistory = true; } /* Storing the job conf on the log dir */ Path jobFilePath = new Path(logDir, CoronaJobHistoryFilesManager.getConfFilename(jobId)); fileManager.setConfFile(jobId, jobFilePath); FSDataOutputStream jobFileOut = null; try { if (!logDirFs.exists(jobFilePath)) { jobFileOut = logDirFs.create( jobFilePath, new FsPermission(HISTORY_FILE_PERMISSION), true, defaultBufferSize, logDirFs.getDefaultReplication(), logDirFs.getDefaultBlockSize(), null); conf.writeXml(jobFileOut); jobFileOut.close(); } } catch (IOException ioe) { LOG.error("Failed to store job conf in the log dir", ioe); } finally { if (jobFileOut != null) { try { jobFileOut.close(); } catch (IOException ie) { LOG.info( "Failed to close the job configuration file " + StringUtils.stringifyException(ie)); } } } }