/** * Setup output format appropriately * * @param job - Job handle * @throws IOException - Exception if any */ private void configureOutputFormat(Job job) throws IOException { final Configuration configuration = job.getConfiguration(); Path targetPath = inputOptions.getTargetPath(); targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration)); if (inputOptions.shouldAtomicCommit()) { Path workDir = inputOptions.getAtomicWorkPath(); if (workDir == null) { workDir = targetPath.getParent(); } workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt()); FileSystem workFS = workDir.getFileSystem(configuration); FileSystem targetFS = targetPath.getFileSystem(configuration); if (!DistCpUtils.compareFs(targetFS, workFS)) { throw new IllegalArgumentException( "Work path " + workDir + " and target path " + targetPath + " are in different file system"); } CopyOutputFormat.setWorkingDirectory(job, workDir); } else { CopyOutputFormat.setWorkingDirectory(job, targetPath); } CopyOutputFormat.setCommitDirectory(job, targetPath); Path counterFilePath = inputOptions.getOutPutDirectory(); if (counterFilePath == null) { LOG.error("Output directory is null for distcp"); } else { LOG.info("DistCp output directory path: " + counterFilePath); CopyOutputFormat.setOutputPath(job, counterFilePath); } }
public boolean refresh(final Path path) throws IOException { try (FileSystem fs = path.getFileSystem(new Configuration())) { if (_fileStatus.isPresent()) { Optional<FileStatus> oldStatus = this._fileStatus; try { Optional<FileStatus> newStatus = Optional.of(fs.getFileStatus(path)); this.exists = newStatus.isPresent(); return (oldStatus.isPresent() != this._fileStatus.isPresent() || oldStatus.get().getModificationTime() != newStatus.get().getModificationTime() || oldStatus.get().isDirectory() != newStatus.get().isDirectory() || oldStatus.get().getLen() != newStatus.get().getLen()); } catch (FileNotFoundException e) { _fileStatus = Optional.absent(); this.exists = false; return true; } } else { if (path.getFileSystem(new Configuration()).exists(path)) { _fileStatus = Optional.of(fs.getFileStatus(path)); return true; } else { return false; } } } }
private static void finalize( Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return; } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair); ) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }
public static void runJob(String... args) throws Exception { Path smallFilePath = new Path(args[0]); Configuration conf = new Configuration(); FileSystem fs = smallFilePath.getFileSystem(conf); FileStatus smallFilePathStatus = fs.getFileStatus(smallFilePath); if (smallFilePathStatus.isDir()) { for (FileStatus f : fs.listStatus(smallFilePath)) { if (f.getPath().getName().contains(args[0])) { DistributedCache.addCacheFile(f.getPath().toUri(), conf); } } } else { DistributedCache.addCacheFile(smallFilePath.toUri(), conf); } Path inputPath = new Path(args[1]); Path outputPath = new Path(args[2]); Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(GenericReplicatedJoin.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setNumReduceTasks(0); outputPath.getFileSystem(conf).delete(outputPath, true); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); }
/** * Setup to do for each unit test. * * @throws IOException if there's an error accessing the local file system */ @Before public void setUp() throws IOException { srcMetastore = new MockHiveMetastoreClient(); destMetastore = new MockHiveMetastoreClient(); srcLocalTmp.create(); destLocalTmp.create(); final Path srcFsRoot = new Path("file://" + srcLocalTmp.getRoot().getAbsolutePath()); final Path destFsRoot = new Path("file://" + destLocalTmp.getRoot().getAbsolutePath()); srcWarehouseRoot = new Path(makeFileUri(srcLocalTmp), "warehouse"); destWarehouseRoot = new Path(makeFileUri(destLocalTmp), "warehouse"); srcWarehouseRoot.getFileSystem(conf).mkdirs(srcWarehouseRoot); destWarehouseRoot.getFileSystem(conf).mkdirs(destWarehouseRoot); LOG.info(String.format("src root: %s, dest root: %s", srcWarehouseRoot, destWarehouseRoot)); final Path srcTmp = new Path(makeFileUri(this.srcLocalTmp), "tmp"); final Path destTmp = new Path(makeFileUri(this.destLocalTmp), "tmp"); srcCluster = new MockCluster("src_cluster", srcMetastore, srcFsRoot, srcTmp); destCluster = new MockCluster("dest_cluster", destMetastore, destFsRoot, destTmp); // Disable checking of modified times as the local filesystem does not // support this directoryCopier = new DirectoryCopier(conf, destCluster.getTmpDir(), false); }
/** * @param src the source file. * @param dest the destination file. * @param conf the configuration * @return true if the file names match else returns false. * @throws IOException when any file system related call fails */ private boolean checkPreExisting(Path src, Path dest, Configuration conf) throws IOException { FileSystem destFS = dest.getFileSystem(conf); FileSystem sourceFS = src.getFileSystem(conf); if (destFS.exists(dest)) { return (sourceFS.getFileStatus(src).getLen() == destFS.getFileStatus(dest).getLen()); } return false; }
@Test public void testRhget() throws Exception { final Path tmpPath = new Path("hdfs://localhost:9000/tmp/junit-test2"); Assert.assertTrue(tmpPath.getFileSystem(personalServer.getConf()).mkdirs(tmpPath)); // todo: why not use this instead of what is in rhget? tmpPath .getFileSystem(personalServer.getConf()) .copyToLocalFile(true, tmpPath, new Path("/tmp/junit-test2")); // personalServer.rhget(tmpPath.toString(),"hdfs://localhost:9000/tmp/copy"); // personalServer.rhdel(tmpPath.toString()); }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, "ClientUserInstallMR"); job.setJarByClass(ClientUserInstallMR.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); String outputDir = conf.get("output_dir"); String tmpDir = outputDir + "_tmp"; Path tmpOutput = new Path(tmpDir); FileOutputFormat.setOutputPath(job, tmpOutput); tmpOutput.getFileSystem(conf).delete(tmpOutput, true); job.setMapperClass(ClientUserInstallFirstMapper.class); job.setReducerClass(ClientUserInstallFirstReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(30); int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Job secondJob = new Job(conf, "ClientUserInstallResult"); secondJob.setJarByClass(ClientUserInstallMR.class); conf.set("stat_date", conf.get("stat_date")); FileInputFormat.addInputPath(secondJob, new Path(tmpDir)); Path output = new Path(outputDir); FileOutputFormat.setOutputPath(secondJob, output); output.getFileSystem(conf).delete(output, true); secondJob.setMapperClass(ClientUserInstallSecondMapper.class); secondJob.setReducerClass(ClientUserInstallSecondReduce.class); secondJob.setInputFormatClass(KeyValueTextInputFormat.class); secondJob.setOutputFormatClass(TextOutputFormat.class); secondJob.setOutputKeyClass(Text.class); secondJob.setOutputValueClass(Text.class); secondJob.setNumReduceTasks(1); code = secondJob.waitForCompletion(true) ? 0 : 1; } FileSystem.get(conf).delete(tmpOutput, true); System.exit(code); return code; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser gop = new GenericOptionsParser(conf, args); conf = gop.getConfiguration(); Job job = new Job(conf, conf.get("job_name")); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path output = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, output); output.getFileSystem(conf).delete(output, true); job.setJarByClass(BrowerLogFormatMR.class); job.setMapperClass(BrowerLogFormatMapper.class); job.setReducerClass(BrowerLogFormatReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); int code = job.waitForCompletion(true) ? 0 : 1; return code; }
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext taskAttemptContext) throws IOException { context = taskAttemptContext; FileSplit fileSplit = (FileSplit) genericSplit; lzoFile = fileSplit.getPath(); // The LzoSplitInputFormat is not splittable, so the split length is the whole file. totalFileSize = fileSplit.getLength(); // Jump through some hoops to create the lzo codec. Configuration conf = CompatibilityUtil.getConfiguration(context); CompressionCodecFactory factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(lzoFile); ((Configurable) codec).setConf(conf); LzopDecompressor lzopDecompressor = (LzopDecompressor) codec.createDecompressor(); FileSystem fs = lzoFile.getFileSystem(conf); rawInputStream = fs.open(lzoFile); // Creating the LzopInputStream here just reads the lzo header for us, nothing more. // We do the rest of our input off of the raw stream is. codec.createInputStream(rawInputStream, lzopDecompressor); // This must be called AFTER createInputStream is called, because createInputStream // is what reads the header, which has the checksum information. Otherwise getChecksumsCount // erroneously returns zero, and all block offsets will be wrong. numCompressedChecksums = lzopDecompressor.getCompressedChecksumsCount(); numDecompressedChecksums = lzopDecompressor.getDecompressedChecksumsCount(); }
public Path write(Message... messages) throws Exception { synchronized (WriteUsingMR.class) { outputPath = TestUtils.someTemporaryFilePath(); Path inputPath = TestUtils.someTemporaryFilePath(); FileSystem fileSystem = inputPath.getFileSystem(conf); fileSystem.create(inputPath); inputMessages = Collections.unmodifiableList(Arrays.asList(messages)); final Job job = new Job(conf, "write"); // input not really used TextInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(WritingMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(ProtoParquetOutputFormat.class); ProtoParquetOutputFormat.setOutputPath(job, outputPath); ProtoParquetOutputFormat.setProtobufClass(job, TestUtils.inferRecordsClass(messages)); waitForJob(job); inputMessages = null; return outputPath; } }
@Before public void setUp() throws Exception { // create local Pig server pigServer = UnitTestUtil.makePigServer(); // create temp SequenceFile File tempFile = File.createTempFile("test", ".txt"); tempFilename = tempFile.getAbsolutePath(); Path path = new Path("file:///" + tempFilename); Configuration conf = new Configuration(); FileSystem fs = path.getFileSystem(conf); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < DATA.length; ++i) { key.set(i); value.set(DATA[i]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) { List<String> result = newArrayList(); String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, ""); Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex); try { Path partitionDirPath = new Path(part.getSd().getLocation()); FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath); for (FileStatus currStatus : partitionContent) { if (!currStatus.isDir()) { if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) { result.add(currStatus.getPath().toUri().getPath()); } else { LOG.debug( "Ignoring path {} since matches ignore regex {}", currStatus.getPath().toUri().getPath(), ignoreFileRegex); } } } } catch (IOException e) { logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e); } return result; }
/** debugging TODO remove */ private void readOutputFiles(String jobName, Path outDir) throws IOException { FileSystem fs = outDir.getFileSystem(getConf()); if (!fs.getFileStatus(outDir).isDir()) { throw new IOException(outDir.toString() + " is not a directory"); } FileStatus[] files = fs.listStatus(outDir); for (FileStatus f : files) { Path fPath = f.getPath(); if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) { LOG.info("opening " + fPath.toString()); SequenceFile.Reader reader = new SequenceFile.Reader(fs, fPath, getConf()); Text key = new Text(); Text value = new Text(); while (reader.next(key, value)) { LOG.info("read " + f.getPath().toString()); LOG.info("read: k=" + key.toString() + " v=" + value.toString()); } LOG.info("done reading " + fPath.toString()); reader.close(); } } }
@Override protected synchronized void startInternal() throws Exception { // create filesystem only now, as part of service-start. By this time, RM is // authenticated with kerberos so we are good to create a file-system // handle. fsConf = new Configuration(getConfig()); fsConf.setBoolean("dfs.client.retry.policy.enabled", true); String retryPolicy = fsConf.get( YarnConfiguration.FS_RM_STATE_STORE_RETRY_POLICY_SPEC, YarnConfiguration.DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC); fsConf.set("dfs.client.retry.policy.spec", retryPolicy); String scheme = fsWorkingPath.toUri().getScheme(); if (scheme == null) { scheme = FileSystem.getDefaultUri(fsConf).getScheme(); } if (scheme != null) { String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); fsConf.setBoolean(disableCacheName, true); } fs = fsWorkingPath.getFileSystem(fsConf); mkdirsWithRetries(rmDTSecretManagerRoot); mkdirsWithRetries(rmAppRoot); mkdirsWithRetries(amrmTokenSecretManagerRoot); mkdirsWithRetries(reservationRoot); }
@Override public RecordWriter<IEtlKey, CamusWrapper> getDataRecordWriter( TaskAttemptContext context, String fileName, CamusWrapper data, FileOutputCommitter committer) throws IOException, InterruptedException { // If recordDelimiter hasn't been initialized, do so now if (recordDelimiter == null) { recordDelimiter = context.getConfiguration().get(ETL_OUTPUT_RECORD_DELIMITER, DEFAULT_RECORD_DELIMITER); } // Get the filename for this RecordWriter. Path path = new Path( committer.getWorkPath(), EtlMultiOutputFormat.getUniqueFile(context, fileName, getFilenameExtension())); // final FSDataOutputStream writer = // path.getFileSystem(context.getConfiguration()).create(path); FileSystem fs = path.getFileSystem(context.getConfiguration()); DataOutputStream writer; // = fs.create(path, false); if (isCompressed) { return new ByteRecordWriter( new DataOutputStream(codec.createOutputStream(fs.create(path, false))), recordDelimiter); } else { return new ByteRecordWriter(fs.create(path, false), recordDelimiter); } }
public Writer(Configuration conf, Path dir, SequenceFile.Writer.Option... options) throws IOException { super(conf, dir, options); this.fs = dir.getFileSystem(conf); this.dir = dir; initBloomFilter(conf); }
@Override public int run(String[] arg0) throws Exception { Job job = Job.getInstance(getConf(), "PopulationJob"); Configuration conf = job.getConfiguration(); job.setJarByClass(Population.class); Path out = new Path("totalorder"); FileInputFormat.setInputPaths(job, "populations"); FileOutputFormat.setOutputPath(job, out); out.getFileSystem(conf).delete(out, true); job.setMapperClass(PopulationMapper.class); job.setReducerClass(PopulationReducer.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(5); // Configure the TotalOrderPartitioner here... job.setPartitionerClass(TotalOrderPartitioner.class); InputSampler.Sampler<Text, Text> sampler = new InputSampler.RandomSampler<Text, Text>(0.1, 200, 3); InputSampler.writePartitionFile(job, sampler); String partitionFile = TotalOrderPartitioner.getPartitionFile(conf); URI partitionURI = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); job.addCacheFile(partitionURI); return job.waitForCompletion(true) ? 0 : 1; }
/** * Configures the partitioner for generating HFiles. * * <p>Each generated HFile should fit within a region of of the target table. Additionally, it's * optimal to have only one HFile to load into each region, since a read from that region will * require reading from each HFile under management (until compaction happens and merges them all * back into one HFile). * * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the records output * from the Mapper based on their rank in a total ordering of the keys. The <code>startKeys</code> * argument should contain a list of the first key in each of those partitions. * * @param job The job to configure. * @param startKeys A list of keys that will mark the boundaries between the partitions for the * sorted map output records. * @throws IOException If there is an error. */ private static void configurePartitioner(Job job, List<HFileKeyValue> startKeys) throws IOException { job.setPartitionerClass(TotalOrderPartitioner.class); LOG.info("Configuring " + startKeys.size() + " reduce partitions."); job.setNumReduceTasks(startKeys.size()); // Write the file that the TotalOrderPartitioner reads to determine where to partition records. Path partitionFilePath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis()); LOG.info("Writing partition information to " + partitionFilePath); final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration()); partitionFilePath = partitionFilePath.makeQualified(fs); writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys); // Add it to the distributed cache. try { final URI cacheUri = new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(cacheUri, job.getConfiguration()); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.createSymlink(job.getConfiguration()); }
/** * Return a list of all urls matching this input. If autocomplete is false, the list contains only * 1 element (same as getUrl()). Otherwise, it will try to return all the files beginning with * what is returned by getUrl(). * * @param jobConf A Configuration object * @return the list of input url */ public HashSet<URI> getAllUrls(Configuration jobConf) { HashSet<URI> urls = new HashSet<URI>(); if (!isAutoComplete()) { urls.add(url); } else { Path basePath = new Path(url); String filePrefix = basePath.getName(); try { FileSystem fs = basePath.getFileSystem(jobConf); if (!fs.exists(basePath.getParent())) { throw new IOException("Input directory not found: " + url); } FileStatus[] stats = fs.listStatus(basePath.getParent()); for (int i = 0; i < stats.length; i++) { Path path = stats[i].getPath(); if (fs.isFile(path) && path.getName().startsWith(filePrefix)) urls.add(path.toUri()); } } catch (IOException e) { System.err.println("Unable to autocomplete input file"); e.printStackTrace(); System.exit(1); } } return urls; }
/** * Write a partition file for the given job, using the Sampler provided. Queries the sampler for a * sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes * to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}. */ @SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = job.getConfiguration(); final InputFormat inf = ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) { fs.delete(dst, false); } SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; int last = -1; for (int i = 1; i < numPartitions; ++i) { int k = Math.round(stepSize * i); while (last >= k && comparator.compare(samples[last], samples[k]) == 0) { ++k; } writer.append(samples[k], nullValue); last = k; } writer.close(); }
public static void run(Configuration conf, Path input, String outputFile) throws IOException, InstantiationException, IllegalAccessException { Writer writer; if (outputFile == null) { writer = new OutputStreamWriter(System.out); } else { writer = new OutputStreamWriter( new FileOutputStream(new File(outputFile)), Charset.forName("UTF-8")); } try { FileSystem fs = input.getFileSystem(conf); for (FileStatus fst : fs.listStatus(input, new DataPathFilter())) { Path dataPath = fst.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf); try { Text key = reader.getKeyClass().asSubclass(Text.class).newInstance(); DocumentMapping value = new DocumentMapping(); while (reader.next(key, value)) { String docId = value.getDocId(); writer.write(docId + "\t" + key + "\n"); } } finally { reader.close(); } } } finally { writer.close(); } }
public QseqRecordReader(Configuration conf, FileSplit split) throws IOException { setConf(conf); file = split.getPath(); start = split.getStart(); end = start + split.getLength(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(file); CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(file); if (codec == null) // no codec. Uncompressed file. { positionAtFirstRecord(fileIn); inputStream = fileIn; } else { // compressed file if (start != 0) throw new RuntimeException( "Start position for compressed file is not 0! (found " + start + ")"); inputStream = codec.createInputStream(fileIn); end = Long.MAX_VALUE; // read until the end of the file } lineReader = new LineReader(inputStream); }
private static FileSystem fsForPath(Path dataPath, Configuration conf) { try { return dataPath.getFileSystem(conf); } catch (IOException ex) { throw new DatasetRepositoryException("Cannot get FileSystem for descriptor", ex); } }
@Override public int execute(DriverContext driverContext) { PrintStream out = null; try { Path resFile = new Path(work.getResFile()); OutputStream outS = resFile.getFileSystem(conf).create(resFile); out = new PrintStream(outS); QB qb = work.getQb(); TokenRewriteStream stream = work.getCtx().getTokenRewriteStream(); String program = "sq rewrite"; ASTNode ast = work.getAst(); try { addRewrites(stream, qb, program, out); out.println( "\nRewritten Query:\n" + stream.toString(program, ast.getTokenStartIndex(), ast.getTokenStopIndex())); } finally { stream.deleteProgram(program); } out.close(); out = null; return (0); } catch (Exception e) { console.printError( "Failed with exception " + e.getMessage(), "\n" + StringUtils.stringifyException(e)); return (1); } finally { IOUtils.closeStream(out); } }
/** Run a FileOperation */ public void map( Text key, PolicyInfo policy, OutputCollector<WritableComparable, Text> out, Reporter reporter) throws IOException { this.reporter = reporter; try { LOG.info("Raiding file=" + key.toString() + " policy=" + policy); Path p = new Path(key.toString()); FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p); st.clear(); RaidNode.doRaid(jobconf, policy, fs, st, reporter); ++succeedcount; reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks); reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize); reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks); reporter.incrCounter(Counter.META_SIZE, st.metaSize); reporter.incrCounter(Counter.FILES_SUCCEEDED, 1); } catch (IOException e) { ++failcount; reporter.incrCounter(Counter.FILES_FAILED, 1); String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e); out.collect(null, new Text(s)); LOG.info(s); } finally { reporter.setStatus(getCountString()); } }
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * * @param job The {@link Job} to modify * @param path {@link Path} to be added to the list of inputs for the map-reduce job. */ public static void addInputPath(Job job, Path path) throws IOException { Configuration conf = job.getConfiguration(); path = path.getFileSystem(conf).makeQualified(path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(INPUT_DIR); conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr); }
/** * set up input file which has the list of input files. * * @return boolean * @throws IOException */ private boolean setup() throws IOException { estimateSavings(); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobconf); Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId); LOG.info(JOB_DIR_LABEL + "=" + jobdir); jobconf.set(JOB_DIR_LABEL, jobdir.toString()); Path log = new Path(jobdir, "_logs"); // The control file should have small size blocks. This helps // in spreading out the load from mappers that will be spawned. jobconf.setInt("dfs.blocks.size", OP_LIST_BLOCK_SIZE); FileOutputFormat.setOutputPath(jobconf, log); LOG.info("log=" + log); // create operation list FileSystem fs = jobdir.getFileSystem(jobconf); Path opList = new Path(jobdir, "_" + OP_LIST_LABEL); jobconf.set(OP_LIST_LABEL, opList.toString()); int opCount = 0, synCount = 0; SequenceFile.Writer opWriter = null; try { opWriter = SequenceFile.createWriter( fs, jobconf, opList, Text.class, PolicyInfo.class, SequenceFile.CompressionType.NONE); for (RaidPolicyPathPair p : raidPolicyPathPairList) { // If a large set of files are Raided for the first time, files // in the same directory that tend to have the same size will end up // with the same map. This shuffle mixes things up, allowing a better // mix of files. java.util.Collections.shuffle(p.srcPaths); for (FileStatus st : p.srcPaths) { opWriter.append(new Text(st.getPath().toString()), p.policy); opCount++; if (++synCount > SYNC_FILE_MAX) { opWriter.sync(); synCount = 0; } } } } finally { if (opWriter != null) { opWriter.close(); } fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file } raidPolicyPathPairList.clear(); jobconf.setInt(OP_COUNT_LABEL, opCount); LOG.info("Number of files=" + opCount); jobconf.setNumMapTasks( getMapCount(opCount, new JobClient(jobconf).getClusterStatus().getTaskTrackers())); LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks()); return opCount != 0; }
public static boolean runJobs(List<Jobby> jobs, HadoopDruidIndexerConfig config) { String failedMessage = null; for (Jobby job : jobs) { if (failedMessage == null) { if (!job.run()) { failedMessage = String.format("Job[%s] failed!", job.getClass()); } } } if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { workingPath .getFileSystem(injectSystemProperties(new Configuration())) .delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } if (failedMessage != null) { throw new ISE(failedMessage); } return true; }
public void store(Path output) throws IOException { FileSystem fs = output.getFileSystem(HadoopUtils.createConfiguration()); fs.delete(output, true); OutputStream os = fs.create(output, true); store(os); os.close(); }