private static void StartingJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection Starting"); job.setJarByClass(LevNestDissectJob.class); job.setMapperClass(StartVertexMapper.class); job.setReducerClass(StartVertexReducer.class); in = out.suffix("/" + outPath_count); FileInputFormat.addInputPath(job, in); out_start = out.suffix("/" + outPath_start); if (fs.exists(out_start)) { fs.delete(out_start, true); } FileOutputFormat.setOutputPath(job, out_start); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VertexWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); depth = depth == 0 ? depth + 1 : depth; wasStart = true; }
private FSDataOutputStream setupOutputFile(Path path) throws IOException { fs = path.getFileSystem(CompatibilityUtil.getConfiguration(context)); inputPath = path; // For /a/b/c.lzo, tmpIndexPath = /a/b/c.lzo.index.tmp, // and it is moved to realIndexPath = /a/b/c.lzo.index upon completion. tmpIndexPath = path.suffix(LzoIndex.LZO_TMP_INDEX_SUFFIX); realIndexPath = path.suffix(LzoIndex.LZO_INDEX_SUFFIX); // Delete the old index files if they exist. fs.delete(tmpIndexPath, false); fs.delete(realIndexPath, false); return fs.create(tmpIndexPath, false); }
/** * Creates an output segment file and sets up the output streams to point at it. If the file * already exists, retries with a different filename. This is a bit nasty -- after all, {@link * FileOutputFormat}'s work directory concept is supposed to prevent filename clashes -- but it * looks like Amazon Elastic MapReduce prevents use of per-task work directories if the output of * a job is on S3. * * <p>TODO: Investigate this and find a better solution. */ private void createSegment() throws IOException { segmentsAttempted = 0; bytesWritten = 0; boolean success = false; while (!success) { Path path = workOutputPath.suffix(String.format(extensionFormat, segmentsCreated, segmentsAttempted)); FileSystem fs = path.getFileSystem(conf); try { // The o.a.h.mapred OutputFormats overwrite existing files, whereas // the o.a.h.mapreduce OutputFormats don't overwrite. Bizarre... // Here, overwrite if progress != null, i.e. if using mapred API. FSDataOutputStream fsStream = (progress == null) ? fs.create(path, false) : fs.create(path, progress); byteStream = new CountingOutputStream(new BufferedOutputStream(fsStream)); dataStream = new DataOutputStream(codec == null ? byteStream : codec.createOutputStream(byteStream)); segmentsCreated++; logger.info("Writing to output file: {}", path); success = true; } catch (IOException e) { if (e.getMessage().startsWith("File already exists")) { logger.warn("Tried to create file {} but it already exists; retrying.", path); segmentsAttempted++; // retry } else { throw e; } } } }
/** Returns the Hdfs size of the given region in bytes. */ public long getHdfsSize(HRegionInfo info) throws IOException { Path tableDir = HTableDescriptor.getTableDir( FSUtils.getRootDir(hbaseConf_), Bytes.toBytes(hbaseTableName_)); FileSystem fs = tableDir.getFileSystem(hbaseConf_); Path regionDir = tableDir.suffix("/" + info.getEncodedName()); return fs.getContentSummary(regionDir).getLength(); }
/** Test compressible {@link GridmixRecord}. */ @Test public void testCompressibleGridmixRecord() throws IOException { JobConf conf = new JobConf(); CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); FileSystem lfs = FileSystem.getLocal(conf); int dataSize = 1024 * 1024 * 10; // 10 MB float ratio = 0.357F; // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestPossiblyCompressibleGridmixRecord"); lfs.delete(tempDir, true); // define a compressible GridmixRecord GridmixRecord record = new GridmixRecord(dataSize, 0); record.setCompressibility(true, ratio); // enable compression conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); // write the record to a file Path recordFile = new Path(tempDir, "record"); OutputStream outStream = CompressionEmulationUtil.getPossiblyCompressedOutputStream(recordFile, conf); DataOutputStream out = new DataOutputStream(outStream); record.write(out); out.close(); outStream.close(); // open the compressed stream for reading Path actualRecordFile = recordFile.suffix(".gz"); InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(actualRecordFile, conf, 0); // get the compressed file size long compressedFileSize = lfs.listStatus(actualRecordFile)[0].getLen(); GridmixRecord recordRead = new GridmixRecord(); recordRead.readFields(new DataInputStream(in)); assertEquals( "Record size mismatch in a compressible GridmixRecord", dataSize, recordRead.getSize()); assertTrue( "Failed to generate a compressible GridmixRecord", recordRead.getSize() > compressedFileSize); // check if the record can generate data with the desired compression ratio float seenRatio = ((float) compressedFileSize) / dataSize; assertEquals( CompressionEmulationUtil.standardizeCompressionRatio(ratio), CompressionEmulationUtil.standardizeCompressionRatio(seenRatio), 1.0D); }
private static void ResultJob() throws IOException, InterruptedException, ClassNotFoundException { /*depth = 9; wasError = true;*/ conf = new Configuration(); conf.setLong("my.vertex.num", num); if (isErrorOccurred) { conf.setBoolean("my.error.was", true); } fs = FileSystem.get(conf); job = Job.getInstance(conf, "Levelized Nested Dissection Result"); job.setJarByClass(LevNestDissectJob.class); job.setReducerClass(LNDResultReducer.class); /*out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth)); out_start = out.suffix("/" + outPath_start);*/ if (wasError) { in = out.suffix("/" + outPath_count); MultipleInputs.addInputPath(job, in, SequenceFileInputFormat.class, StartVertexMapper.class); in_start = out_start; MultipleInputs.addInputPath( job, in_start, SequenceFileInputFormat.class, LNDResultMapper.class); } in_vertex = out.suffix("/" + outPath_vertex); MultipleInputs.addInputPath( job, in_vertex, SequenceFileInputFormat.class, LNDResultMapper.class); out = new Path(outPath == null ? (FILES_OUT + "result") : (outPath + "/" + "depth_" + "result")); if (fs.exists(out)) { fs.delete(out, true); } FileOutputFormat.setOutputPath(job, out); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.waitForCompletion(true); }
@Override public Path resolve(Path path) { CodecInfo c = getCodecInfo(); String suffix = c != null ? "." + c.getDefaultSuffix() : ""; if (path != null) { return path.suffix(suffix); } else if (StringUtils.hasText(suffix)) { return new Path(suffix); } else { return path; } }
@Override public RecordWriter<IntWritable, Double2DArrayWritable> getRecordWriter( TaskAttemptContext context) throws IOException, InterruptedException { // setup variables for image generation FileSystem fs = FileSystem.get(context.getConfiguration()); Path picTempPath = FileOutputFormat.getOutputPath(context); fs.mkdirs(picTempPath); int k = context.getConfiguration().getInt("k", -1); Path imgPath = picTempPath.suffix("/points.png"); if (k == -1) throw new RuntimeException("k is -1"); return new PicRecordWriter(imgPath, k, context.getConfiguration()); }
/** * Test of {@link FileQueue} can identify compressed file and provide readers to extract * uncompressed data only if input-compression is enabled. */ @Test public void testFileQueueDecompression() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput(conf, true); org.apache.hadoop.mapred.FileOutputFormat.setOutputCompressorClass(conf, GzipCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestFileQueueDecompression"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); compressedFile = compressedFile.suffix(".gz"); // now read back the data from the compressed stream using FileQueue long fileSize = lfs.listStatus(compressedFile)[0].getLen(); CombineFileSplit split = new CombineFileSplit(new Path[] {compressedFile}, new long[] {fileSize}); FileQueue queue = new FileQueue(split, conf); byte[] bytes = new byte[inputLine.getBytes().length]; queue.read(bytes); queue.close(); String readLine = new String(bytes); assertEquals("Compression/Decompression error", inputLine, readLine); }
/** * Test {@link CompressionEmulationUtil#getPossiblyDecompressedInputStream(Path, Configuration, * long)} and {@link CompressionEmulationUtil#getPossiblyCompressedOutputStream(Path, * Configuration)}. */ @Test public void testPossiblyCompressedDecompressedStreams() throws IOException { JobConf conf = new JobConf(); FileSystem lfs = FileSystem.getLocal(conf); String inputLine = "Hi Hello!"; CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); conf.setBoolean(FileOutputFormat.COMPRESS, true); conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); // define the test's root temp directory Path rootTempDir = new Path(System.getProperty("test.build.data", "/tmp")) .makeQualified(lfs.getUri(), lfs.getWorkingDirectory()); Path tempDir = new Path(rootTempDir, "TestPossiblyCompressedDecompressedStreams"); lfs.delete(tempDir, true); // create a compressed file Path compressedFile = new Path(tempDir, "test"); OutputStream out = CompressionEmulationUtil.getPossiblyCompressedOutputStream(compressedFile, conf); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); writer.write(inputLine); writer.close(); // now read back the data from the compressed stream compressedFile = compressedFile.suffix(".gz"); InputStream in = CompressionEmulationUtil.getPossiblyDecompressedInputStream(compressedFile, conf, 0); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String readLine = reader.readLine(); assertEquals("Compression/Decompression error", inputLine, readLine); reader.close(); }
private static void DissectionJob() throws IOException, InterruptedException, ClassNotFoundException { conf = new Configuration(); fs = FileSystem.get(conf); conf.set("my.out.path.vertex", outPath_vertex); conf.set("my.out.path.count", outPath_count); conf.setLong("my.vertex.num", num); job = Job.getInstance(conf, "Levelized Nested Dissection " + depth); job.setJarByClass(LevNestDissectJob.class); job.setReducerClass(LevNestDissectReducer.class); if (wasStart) { in_start = out_start; MultipleInputs.addInputPath( job, in_start, SequenceFileInputFormat.class, LevNestDissectMapper.class); } in_vertex = out.suffix("/" + outPath_vertex); MultipleInputs.addInputPath( job, in_vertex, SequenceFileInputFormat.class, LevNestDissectMapper.class); out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth)); if (fs.exists(out)) { fs.delete(out, true); } FileOutputFormat.setOutputPath(job, out); MultipleOutputs.addNamedOutput( job, "vertex", SequenceFileOutputFormat.class, LongWritable.class, VertexWritable.class); MultipleOutputs.addNamedOutput( job, "count", SequenceFileOutputFormat.class, LongWritable.class, LongWritable.class); job.setMapOutputValueClass(VertexWritable.class); job.waitForCompletion(true); updated = job.getCounters().findCounter(LevNestDissectReducer.UpdatedCounter.UPDATED).getValue(); // Костыль для непонятной ошибки if (notNumbered > 0 && updated == 0) { notNumbered_tmp = job.getCounters() .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED) .getValue(); if (notNumbered_tmp > 0) { notNumbered = notNumbered_tmp; wasError = false; nextDepth(); } else { wasError = true; isErrorOccurred = true; depth--; out = new Path( outPath == null ? (FILES_OUT + (depth - 1)) : (outPath + "/" + "depth_" + (depth - 1))); /*depth -= 2; out = new Path(outPath == null ? (FILES_OUT + depth) : (outPath + "/" + "depth_" + depth));*/ } } else { wasError = false; notNumbered = job.getCounters() .findCounter(LevNestDissectReducer.NotNumberedCounter.NOT_NUMBERED) .getValue(); if (notNumbered > 0) { nextDepth(); } } }
public void splitLog(final List<ServerName> serverNames) throws IOException { long splitTime = 0, splitLogSize = 0; List<Path> logDirs = new ArrayList<Path>(); for(ServerName serverName: serverNames){ Path logDir = new Path(this.rootdir, HLog.getHLogDirectoryName(serverName.toString())); Path splitDir = logDir.suffix(HLog.SPLITTING_EXT); // rename the directory so a rogue RS doesn't create more HLogs if (fs.exists(logDir)) { if (!this.fs.rename(logDir, splitDir)) { throw new IOException("Failed fs.rename for log split: " + logDir); } logDir = splitDir; LOG.debug("Renamed region directory: " + splitDir); } else if (!fs.exists(splitDir)) { LOG.info("Log dir for server " + serverName + " does not exist"); continue; } logDirs.add(splitDir); } if (logDirs.isEmpty()) { LOG.info("No logs to split"); return; } if (distributedLogSplitting) { splitLogManager.handleDeadWorkers(serverNames); splitTime = EnvironmentEdgeManager.currentTimeMillis(); splitLogSize = splitLogManager.splitLogDistributed(logDirs); splitTime = EnvironmentEdgeManager.currentTimeMillis() - splitTime; } else { for(Path logDir: logDirs){ // splitLogLock ensures that dead region servers' logs are processed // one at a time this.splitLogLock.lock(); try { HLogSplitter splitter = HLogSplitter.createLogSplitter( conf, rootdir, logDir, oldLogDir, this.fs); try { // If FS is in safe mode, just wait till out of it. FSUtils.waitOnSafeMode(conf, conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 1000)); splitter.splitLog(); } catch (OrphanHLogAfterSplitException e) { LOG.warn("Retrying splitting because of:", e); //An HLogSplitter instance can only be used once. Get new instance. splitter = HLogSplitter.createLogSplitter(conf, rootdir, logDir, oldLogDir, this.fs); splitter.splitLog(); } splitTime = splitter.getTime(); splitLogSize = splitter.getSize(); } finally { this.splitLogLock.unlock(); } } } if (this.metrics != null) { this.metrics.addSplit(splitTime, splitLogSize); } }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
// Information needed to get a single file: // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM private static Vector<Path> getFile( FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception { Long latestVersion = latestVersion(config, db_util); try { config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); } catch (Exception E) { logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config)); return null; } if (Integer.parseInt(config.get("timestamp_start")) > Integer.parseInt(config.get("timestamp_stop"))) { return null; } logger.debug( "Getting DB for timestamp " + config.get("timestamp_start") + " to " + config.get("timestamp_stop")); String final_result = getFullPath(config); String temp_path_base = config.get("local_temp_path") + "_" + config.get("task_id") + "_" + config.get("run_id") + "/"; Path newPath = new Path(final_result + "*"); Vector<Path> ret_path = new Vector<Path>(); String lockName = lock(final_result.replaceAll("/", "_")); if (fs.globStatus(newPath).length != 0) { ret_path.add(newPath); unlock(lockName); config.put("full_file_name", final_result); return ret_path; } else { if (!config.get("source").equals("local")) { config.put("temp_path_base", temp_path_base); config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); Class<?> sourceClass = Class.forName("org.gestore.plugin.source." + config.get("source") + "Source"); Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class); Object processor = sourceClass.newInstance(); Object retVal; try { retVal = process_data.invoke(processor, config, fs); } catch (InvocationTargetException E) { Throwable exception = E.getTargetException(); logger.error("Unable to call method in child class: " + exception.toString()); exception.printStackTrace(System.out); unlock(lockName); return null; } FileStatus[] files = (FileStatus[]) retVal; if (files == null) { logger.error("Error getting files, no files returned"); return null; } for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(temp_path_base + config.get("file_id")); String suffix = getSuffix(config.get("file_id"), cur_file.getName()); cur_local_path = cur_local_path.suffix(suffix); Path res_path = new Path(new String(final_result + suffix)); logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString()); if (config.get("copy").equals("true")) { fs.moveFromLocalFile(cur_file, res_path); } else { fs.rename(cur_file, res_path); } } config.put("full_file_name", final_result); } } unlock(lockName); return ret_path; }
/** The main entry point if this class is called as a {@link Tool}. */ @Override public int run(String[] args) throws Exception { Path inputPath = null; Path outputPath = null; Configuration conf = getConf(); // retrieve our paths from the configuration inputPath = new Path(conf.get(Util.CONF_LOGDATA_PATH)); outputPath = new Path(conf.get(Util.CONF_CACHING_SIMULATOR_PATH)); final int numCores = conf.getInt(Util.CONF_NUM_CORES, Util.DEFAULT_NUM_CORES); final int numNodes = conf.getInt(Util.CONF_NUM_NODES, Util.DEFAULT_NUM_NODES); NUM_OF_REDUCE_TASKS = numCores * numNodes; // set the jobname String jobName = Util.JOB_NAME + " [" + CachingTool.ACTION + "] {logdata=" + inputPath.getName() + ", session=" + conf.get(Util.CONF_SESSION_DURATION) + "}"; Util.showStatus("Running " + jobName); conf.set("hadoop.job.ugi", Util.HADOOP_USER); conf.set("mapred.child.java.opts", "-Xmx1500M -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode"); conf.set("mapred.task.timeout", "1800000"); conf.set("mapred.map.tasks.speculative.execution", "false"); conf.set("mapred.reduce.tasks.speculative.execution", "false"); FileSystem fs = FileSystem.get(conf); Job job = new Job(conf, jobName); // set number of reduce tasks job.setNumReduceTasks(NUM_OF_REDUCE_TASKS); // set mapper, reducer, partitioner and grouping comperator job.setJarByClass(CachingTool.class); job.setMapperClass(CachingMapper.class); job.setReducerClass(CachingReducer.class); // GroupingComperator used for Secondary-Sort job.setGroupingComparatorClass(TextPair.FirstComparator.class); job.setPartitionerClass(TextPair.FirstPartitioner.class); job.setOutputKeyClass(TextPair.class); job.setOutputValueClass(Text.class); // set input and output format job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setMaxInputSplitSize(job, Util.DATASET_MB_SPLIT * 25); FileInputFormat.setMinInputSplitSize(job, Util.DATASET_MB_SPLIT * 25); // add input path subdirectories if there are any ArrayList<Path> inputPaths = Util.getInputDirectories(fs, inputPath); int pathsAdded = 0; if (inputPaths.size() > 0) { for (Path p : inputPaths) { if (!p.getName().contains(".") && !p.getName().contains("_")) { Util.showStatus("Adding input paths " + p); FileInputFormat.addInputPath(job, p); pathsAdded++; } } } if (pathsAdded == 0) { Util.showStatus("Adding input path " + inputPath); FileInputFormat.addInputPath(job, inputPath); } // clear output dir fs.delete(outputPath.suffix("/" + conf.get(Util.CONF_SESSION_DURATION)), true); FileOutputFormat.setOutputPath( job, outputPath.suffix("/" + conf.get(Util.CONF_SESSION_DURATION))); // run the job and wait for it to be completed boolean b = job.waitForCompletion(true); // NOTE! The counters will be written HERE // retrieve the counters Counter numNewInCache = job.getCounters().findCounter(CachingReducer.CacheCounter.NEW_TO_CACHE); Counter numRenewCache = job.getCounters().findCounter(CachingReducer.CacheCounter.RENEW_CACHE); Counter numUsedFromCache = job.getCounters().findCounter(CachingReducer.CacheCounter.USED_FROM_CACHE); // write the counters to the metadata file Path headerPath = new Path(conf.get(Util.CONF_CACHING_SIMULATOR_PATH)); FSDataOutputStream out = fs.create(headerPath.suffix("/" + DataSetHeader.SIMULATE_CACHING_METADATA_FILE)); PrintWriter w = new PrintWriter(out); // the sum of all counters equals the sum of all queries in the log file w.println("hostnametypeAddedToCache=" + numNewInCache.getValue()); w.println("queriesAddedAgainToCache=" + numRenewCache.getValue()); w.println("queriesAnsweredFromCache=" + numUsedFromCache.getValue()); w.close(); out.close(); // Delete all empty output files Util.deleteEmptyFiles(fs, outputPath.suffix("/" + conf.get(Util.CONF_SESSION_DURATION))); return b ? 1 : 0; }