public static void main(String[] args) throws Exception { final String NAME_NODE = "hdfs://sandbox.hortonworks.com:8020"; Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(WordCount.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); if (args.length > 2) { job.setNumReduceTasks(Integer.parseInt(args[2])); } job.setMapperClass(CountMapper.class); job.setReducerClass(CountReducer.class); job.setJarByClass(WordCount.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(args[0] + "data/plot_summaries.txt")); FileSystem fs = FileSystem.get(conf); // handle (e.g. delete) existing output path Path outputDestination = new Path(args[0] + args[1]); if (fs.exists(outputDestination)) { fs.delete(outputDestination, true); } // set output path & start job1 FileOutputFormat.setOutputPath(job, outputDestination); int jobCompletionStatus = job.waitForCompletion(true) ? 0 : 1; }
public void testFormat() throws Exception { localFs = FileSystem.getLocal(defaultConf); localFs.delete(workDir, true); Job job = new Job(new Configuration(defaultConf)); Path file = new Path(workDir, "test.txt"); int seed = new Random().nextInt(); Random random = new Random(seed); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { // create a file with length entries Writer writer = new OutputStreamWriter(localFs.create(file)); try { MyClass mc = new MyClass(); for (int i = 0; i < length; i++) { mc.s = Integer.toString(i); mc.v = i; byte[] raw = MessagePack.pack(mc); byte[] b64e = base64_.encodeBase64(raw); byte[] b64d = base64_.decode(b64e); MyClass mc2 = MessagePack.unpack(b64d, mc.getClass()); assertEquals(mc.s, mc2.s); assertEquals(mc.v, mc2.v); writer.write(base64_.encodeToString(raw)); } } finally { writer.close(); } checkFormat(job); } }
public void inject(Path crawlDb, Path urlDir) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Injector: starting at " + sdf.format(start)); LOG.info("Injector: crawlDb: " + crawlDb); LOG.info("Injector: urlDir: " + urlDir); } Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); // map text input file to a <url,CrawlDatum> file if (LOG.isInfoEnabled()) { LOG.info("Injector: Converting injected urls to crawl db entries."); } JobConf sortJob = new NutchJob(getConf()); sortJob.setJobName("inject " + urlDir); FileInputFormat.addInputPath(sortJob, urlDir); sortJob.setMapperClass(InjectMapper.class); FileOutputFormat.setOutputPath(sortJob, tempDir); sortJob.setOutputFormat(SequenceFileOutputFormat.class); sortJob.setOutputKeyClass(Text.class); sortJob.setOutputValueClass(CrawlDatum.class); sortJob.setLong("injector.current.time", System.currentTimeMillis()); RunningJob mapJob = JobClient.runJob(sortJob); long urlsInjected = mapJob.getCounters().findCounter("injector", "urls_injected").getValue(); long urlsFiltered = mapJob.getCounters().findCounter("injector", "urls_filtered").getValue(); LOG.info("Injector: total number of urls rejected by filters: " + urlsFiltered); LOG.info( "Injector: total number of urls injected after normalization and filtering: " + urlsInjected); // merge with existing crawl db if (LOG.isInfoEnabled()) { LOG.info("Injector: Merging injected urls into crawl db."); } JobConf mergeJob = CrawlDb.createJob(getConf(), crawlDb); FileInputFormat.addInputPath(mergeJob, tempDir); mergeJob.setReducerClass(InjectReducer.class); JobClient.runJob(mergeJob); CrawlDb.install(mergeJob, crawlDb); // clean up FileSystem fs = FileSystem.get(getConf()); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info( "Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Path pt = new Path("/user/yao/query/query"); FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(pt))); String line = br.readLine(); String[] keywords = line.split(","); k0 = keywords[0]; k1 = keywords[1]; k2 = keywords[2]; br.close(); }
private static boolean hasFile( dbutil db_util, FileSystem fs, String db_name, String file_id, String file_path) throws Exception { Get file_id_get = new Get(file_id.getBytes()); Result file_result = db_util.doGet(db_name, file_id_get); KeyValue file_names = file_result.getColumnLatest("d".getBytes(), "filenames".getBytes()); if (file_names == null) { return false; } String all_files = new String(file_names.getValue()); String[] files = all_files.split("\n"); for (String line : files) { if (line.equals(file_path)) { if (fs.globStatus(new Path(line + "*")).length == 0) { Put new_put = new Put(file_id.getBytes()); new_put.add( "d".getBytes(), "filenames".getBytes(), all_files.replace(file_path + "\n", "").getBytes()); db_util.doPut(db_name, new_put); return false; } return true; } } return false; }
public void testAbort() throws IOException { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext)); // do setup committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do abort committer.abortTask(tContext); File expectedFile = new File(new Path(committer.getTempTaskOutputPath(tContext), file).toString()); assertFalse("task temp dir still exists", expectedFile.exists()); committer.abortJob(jContext, JobStatus.State.FAILED); expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME).toString()); assertFalse("job temp dir still exists", expectedFile.exists()); assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length); FileUtil.fullyDelete(new File(outDir.toString())); }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
@Before public void setUp() throws IOException { MapFileDemo.main(new String[] {MAP_URI}); Configuration conf = new Configuration(); fs = FileSystem.get(URI.create(MAP_URI), conf); reader = new MapFile.Reader(fs, MAP_URI, conf); key = (WritableComparable<?>) ReflectionUtils.newInstance(reader.getKeyClass(), conf); value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); }
public void testcheckOutputSpecsForbidRecordCompression() throws IOException { Job job = Job.getInstance(new Configuration(), "testcheckOutputSpecsForbidRecordCompression"); FileSystem fs = FileSystem.getLocal(job.getConfiguration()); Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output"); fs.delete(outputdir, true); // Without outputpath, FileOutputFormat.checkoutputspecs will throw // InvalidJobConfException FileOutputFormat.setOutputPath(job, outputdir); // SequenceFileAsBinaryOutputFormat doesn't support record compression // It should throw an exception when checked by checkOutputSpecs SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true); SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); try { new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job); } catch (Exception e) { fail( "Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.getClass().getName()); } SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD); try { new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job); fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat"); } catch (InvalidJobConfException ie) { // expected } catch (Exception e) { fail( "Expected " + InvalidJobConfException.class.getName() + "but caught " + e.getClass().getName()); } }
public static void recursePath(Configuration conf, Path path, Job job) { try { FileSystem fs = path.getFileSystem(conf); FileStatus[] fstats = fs.listStatus(path); if (fstats != null) { for (FileStatus f : fstats) { Path p = f.getPath(); ; if (fs.isFile(p)) { // connection times out otherwise System.err.println("file:" + p.toString()); FileInputFormat.addInputPath(job, p); } else { System.err.println("dir:" + p.toString()); recursePath(conf, p, job); } } } } catch (IOException e) { // shouldn't be here throw new RuntimeException(e); } }
public static void extractQueryFeatures2HDFS(String filename, Job job) throws IOException { // Read the local image.jpg as a Mat Mat query_mat_float = Highgui.imread(LOCAL_USER_DIR + ID + INPUT + "/" + filename, CvType.CV_32FC3); // Convert RGB to GRAY Mat query_gray = new Mat(); Imgproc.cvtColor(query_mat_float, query_gray, Imgproc.COLOR_RGB2GRAY); // Convert the float type to unsigned integer(required by SIFT) Mat query_mat_byte = new Mat(); query_gray.convertTo(query_mat_byte, CvType.CV_8UC3); // // Resize the image to 1/FACTOR both width and height // Mat query_mat_byte = FeatureExtraction.resize(query_mat_byte); // Extract the feature from the (Mat)image Mat query_features = FeatureExtraction.extractFeature(query_mat_byte); System.out.println(PREFIX + "Extracting the query image feature..."); System.out.println("query_mat(float,color):" + query_mat_float); System.out.println("query_mat(float,gray):" + query_gray); System.out.println("query_mat(byte,gray):" + query_mat_byte); System.out.println("query_mat_features:" + query_features); System.out.println(); // Store the feature to the hdfs in order to use it later in different map tasks System.out.println(PREFIX + "Generating the feature file for the query image in HDFS..."); FileSystem fs = FileSystem.get(job.getConfiguration()); String featureFileName = filename.substring(0, filename.lastIndexOf(".")) + ".json"; FSDataOutputStream fsDataOutputStream = fs.create(new Path(HDFS_HOME + USER + ID + INPUT + "/" + featureFileName)); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); bw.write(FeatureExtraction.mat2json(query_features)); bw.close(); System.out.println(PREFIX + "Query feature extraction finished..."); System.out.println(); }
@SuppressWarnings("unchecked") public void testCommitter() throws Exception { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTempTaskOutputPath(tContext)); committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output File expectedFile = new File(new Path(outDir, file).toString()); StringBuffer expectedOutput = new StringBuffer(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); expectedOutput.append(key2).append("\n"); expectedOutput.append(key1).append("\n"); expectedOutput.append(key2).append('\t').append(val2).append("\n"); String output = UtilsForTests.slurp(expectedFile); assertEquals(output, expectedOutput.toString()); FileUtil.fullyDelete(new File(outDir.toString())); }
public void testFormat() throws Exception { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.getLocal(conf); Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.NULL; int seed = new Random().nextInt(); // LOG.info("seed = "+seed); Random random = new Random(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { // LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); byte[] data = new byte[random.nextInt(10)]; random.nextBytes(data); BytesWritable value = new BytesWritable(data); writer.append(key, value); } } finally { writer.close(); } // try splitting the file in a variety of sizes InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; // LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); // LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + // key.get()); // LOG.info("@"+reader.getPos()); // } assertFalse("Key in multiple partitions.", bits.get(key.get())); bits.set(key.get()); count++; } // LOG.info("splits["+j+"]="+splits[j]+" count=" + // count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { if (args.length != 2) { System.out.println("Usage: FeatureMatching ID <inputName.jpeg/inputName.jpg>"); System.exit(1); } SimpleDateFormat sdf = new SimpleDateFormat("", Locale.US); sdf.applyPattern("yyyy-MM-dd_HH-mm-ss"); String time = sdf.format(new Date()); Job job = Job.getInstance(); ID = "/" + args[0]; String filename = args[1]; filename = filename.toLowerCase(); System.out.println("current filename:" + filename); // Detect illegal username (if the username dir doesn't exist) File userPath = new File(LOCAL_USER_DIR + ID); if (!userPath.exists()) { System.out.println("Unauthorized username!!!\nExiting......"); System.exit(1); } // Preprocess the input image.jpg from local dir: /local.../user/ID/input/image.jpg // Save the features to local dir: hdfs://.../user/ID/input/image.jpg extractQueryFeatures2HDFS(filename, job); // Add the feature file to the hdfs cache String featureFileName = filename.substring(0, filename.lastIndexOf(".")) + ".json"; // job.addCacheFile(new Path(HDFS_HOME + USER + ID + INPUT + "/" + // featureFileName).toUri()); job.getConfiguration() .set("featureFilePath", HDFS_HOME + USER + ID + INPUT + "/" + featureFileName); // Check the file type. Only support jpeg/jpg type images String type = filename.substring(args[1].lastIndexOf(".")); if (!(type.equals(".jpg") || type.equals(".jpeg"))) { System.out.println("Image type not supported!!!\nExiting"); System.exit(1); } // Input: hdfs://.../features/ // The feature dir is a location of all features extracted from the database String inputPathStr = HDFS_HOME + FEATURES; // Output: hdfs://.../user/ID/output/ String outputPathStr = HDFS_HOME + USER + ID + OUTPUT + "/" + time; job.setInputFormatClass(KeyValueTextInputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); // Get the lists of all feature files: /.../features/data/part-* FileSystem fs = FileSystem.get(job.getConfiguration()); FileStatus[] statuses = fs.listStatus(new Path(inputPathStr)); StringBuffer sb = new StringBuffer(); for (FileStatus fileStatus : statuses) { sb.append(fileStatus.getPath() + ","); } sb.deleteCharAt(sb.lastIndexOf(",")); job.setJarByClass(FeatureMatching.class); job.setMapperClass(FeatureMatchMapper.class); job.setReducerClass(FeatureMatchReducer.class); // only need one reducer to collect the result job.setNumReduceTasks(1); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); // Input a directory, so need the recursive input FileInputFormat.setInputDirRecursive(job, true); // Set the PathFilter, to omit _SUCCESS files // (This is not working correctly, as the PathFilter class is an interface rather than a class. // But the 2nd arg asks me to extend the PathFilter) // FileInputFormat.setInputPathFilter(job, MyPathFilter.class); // // FileInputFormat.setInputPaths(job, new Path(inputPathStr)); FileInputFormat.setInputPaths(job, sb.toString()); FileOutputFormat.setOutputPath(job, new Path(outputPathStr)); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
public void map(Text key, Text value, Context context) throws InterruptedException, IOException { String filename = key.toString(); String json = value.toString(); // Make sure the input is valid if (!(filename.isEmpty() || json.isEmpty())) { // Change the json-type feature to Mat-type feature Mat descriptor = json2mat(json); if (descriptor != null) { // Read the query feature from the cache in Hadoop Mat query_features; String pathStr = context.getConfiguration().get("featureFilePath"); FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataInputStream fsDataInputStream = fs.open(new Path(pathStr)); StringBuilder sb = new StringBuilder(); // Use a buffer to read the query_feature int remain = fsDataInputStream.available(); while (remain > 0) { int read; byte[] buf = new byte[BUF_SIZE]; read = fsDataInputStream.read(buf, fsDataInputStream.available() - remain, BUF_SIZE); sb.append(new String(buf, 0, read, StandardCharsets.UTF_8)); remain = remain - read; System.out.println("remain:" + remain + "\tread:" + read + "\tsb.size:" + sb.length()); } // Read the query_feature line by line // Scanner sc = new Scanner(fsDataInputStream, "UTF-8"); // StringBuilder sb = new StringBuilder(); // while (sc.hasNextLine()) { // sb.append(sc.nextLine()); // } // String query_json = sb.toString(); // String query_json = new String(buf, StandardCharsets.UTF_8); String query_json = sb.toString(); fsDataInputStream.close(); query_features = json2mat(query_json); // Get the similarity of the current database image against the query image DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.FLANNBASED); MatOfDMatch matches = new MatOfDMatch(); // Ensure the two features have same length of cols (the feature extracted are all 128 // cols(at least in this case)) if (query_features.cols() == descriptor.cols()) { matcher.match(query_features, descriptor, matches); DMatch[] dMatches = matches.toArray(); // Calculate the max/min distances // double max_dist = Double.MAX_VALUE; // double min_dist = Double.MIN_VALUE; double max_dist = 0; double min_dist = 100; for (int i = 0; i < dMatches.length; i++) { double dist = dMatches[i].distance; if (min_dist > dist) min_dist = dist; if (max_dist < dist) max_dist = dist; } // Only distances ≤ threshold are good matches double threshold = max_dist * THRESHOLD_FACTOR; // double threshold = min_dist * 2; LinkedList<DMatch> goodMatches = new LinkedList<DMatch>(); for (int i = 0; i < dMatches.length; i++) { if (dMatches[i].distance <= threshold) { goodMatches.addLast(dMatches[i]); } } // Get the ratio of good_matches to all_matches double ratio = (double) goodMatches.size() / (double) dMatches.length; System.out.println("*** current_record_filename:" + filename + " ***"); System.out.println("feature:" + descriptor + "\nquery_feature:" + query_features); System.out.println( "min_dist of keypoints:" + min_dist + " max_dist of keypoints:" + max_dist); System.out.println( "total_matches:" + dMatches.length + "\tgood_matches:" + goodMatches.size()); // System.out.println("type:" + descriptor.type() + " channels:" + // descriptor.channels() + " rows:" + descriptor.rows() + " cols:" + descriptor.cols()); // System.out.println("qtype:" + query_features.type() + " // qchannels:" + query_features.channels() + " qrows:" + query_features.rows() + " // qcols:" + query_features.cols()); System.out.println(); if (ratio > PERCENTAGE_THRESHOLD) { // Key:1 Value:filename|ratio context.write(ONE, new Text(filename + "|" + ratio)); // context.write(ONE, new Text(filename + "|" + // String.valueOf(goodMatches.size()))); } } else { System.out.println("The size of the features are not equal"); } } else { // a null pointer, do nothing System.out.println("A broken/null feature:" + filename); System.out.println(); } } }
@After public void tearDown() throws IOException { fs.delete(new Path(MAP_URI), true); }
// Information needed to get a single file: // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM private static Vector<Path> getFile( FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception { Long latestVersion = latestVersion(config, db_util); try { config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); } catch (Exception E) { logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config)); return null; } if (Integer.parseInt(config.get("timestamp_start")) > Integer.parseInt(config.get("timestamp_stop"))) { return null; } logger.debug( "Getting DB for timestamp " + config.get("timestamp_start") + " to " + config.get("timestamp_stop")); String final_result = getFullPath(config); String temp_path_base = config.get("local_temp_path") + "_" + config.get("task_id") + "_" + config.get("run_id") + "/"; Path newPath = new Path(final_result + "*"); Vector<Path> ret_path = new Vector<Path>(); String lockName = lock(final_result.replaceAll("/", "_")); if (fs.globStatus(newPath).length != 0) { ret_path.add(newPath); unlock(lockName); config.put("full_file_name", final_result); return ret_path; } else { if (!config.get("source").equals("local")) { config.put("temp_path_base", temp_path_base); config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); Class<?> sourceClass = Class.forName("org.gestore.plugin.source." + config.get("source") + "Source"); Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class); Object processor = sourceClass.newInstance(); Object retVal; try { retVal = process_data.invoke(processor, config, fs); } catch (InvocationTargetException E) { Throwable exception = E.getTargetException(); logger.error("Unable to call method in child class: " + exception.toString()); exception.printStackTrace(System.out); unlock(lockName); return null; } FileStatus[] files = (FileStatus[]) retVal; if (files == null) { logger.error("Error getting files, no files returned"); return null; } for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(temp_path_base + config.get("file_id")); String suffix = getSuffix(config.get("file_id"), cur_file.getName()); cur_local_path = cur_local_path.suffix(suffix); Path res_path = new Path(new String(final_result + suffix)); logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString()); if (config.get("copy").equals("true")) { fs.moveFromLocalFile(cur_file, res_path); } else { fs.rename(cur_file, res_path); } } config.put("full_file_name", final_result); } } unlock(lockName); return ret_path; }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }