public static void main(String[] args) throws Exception { String inputDirectory = "/home/cs246/Desktop/HW2/input"; String outputDirectory = "/home/cs246/Desktop/HW2/output"; String centroidDirectory = "/home/cs246/Desktop/HW2/config"; int iterations = 20; for (int i = 1; i <= iterations; i++) { Configuration conf = new Configuration(); String cFile = centroidDirectory + "/c" + i + ".txt"; String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt"; conf.set("CFILE", cFile); conf.set("NEXTCFILE", nextCFile); String cFile = centroidDirectory + "/c" + i + ".txt"; String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt"; conf.set("CFILE", cFile); conf.set("NEXTCFILE", nextCFile); Job job = new Job(conf, "HW2_Q4." + i); job.setJarByClass(HW2_Q4.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map1.class); job.setReducerClass(Reduce1.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDirectory)); FileOutputFormat.setOutputPath(job, new Path(outputDirectory + "/output" + i)); job.waitForCompletion(true); } }
public int run(String[] args) throws Exception { Configuration argConf = getConf(); // JobConf conf = new JobConf(diffdb.class); Configuration config = HBaseConfiguration.create(); HBaseAdmin hbAdmin = new HBaseAdmin(config); dbutil db_util = new dbutil(config); HTable runTable = new HTable(config, "gestore_runs"); Get runGet = new Get(argConf.get("id").getBytes()); Result pipeline = runTable.get(runGet); NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes()); Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry(); HashMap<String, HashMap<String, String>> resultMap = new HashMap<String, HashMap<String, String>>(); while (results != null) { String resultKey = new String(results.getKey()); String resultValue = new String(results.getValue()); String field = "type"; HashMap<String, String> tempMap = new HashMap<String, String>(); String entry = resultKey; if (resultKey.endsWith("_db_timestamp")) { field = "db_timestamp"; entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp")); } else if (resultKey.endsWith("_filename")) { field = "filename"; entry = resultKey.substring(0, resultKey.lastIndexOf("_filename")); } else if (resultKey.endsWith("_regex")) { field = "regex"; entry = resultKey.substring(0, resultKey.lastIndexOf("_regex")); } if (resultMap.containsKey(entry)) { tempMap = resultMap.get(entry); } tempMap.put(field, resultValue); resultMap.put(entry, tempMap); // System.out.println("Key: " + resultKey + " Value: " + resultValue); results = pipeMap.pollFirstEntry(); } for (String key : resultMap.keySet()) { System.out.println("File ID: " + key); for (String subKey : resultMap.get(key).keySet()) { // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey)); System.out.format(" %1$-20s %2$s\n", subKey, resultMap.get(key).get(subKey)); } } return 0; }
private static String getConfigString(Configuration config) { String output = ""; Iterator<Map.Entry<String, String>> iterConfig = config.iterator(); while (iterConfig.hasNext()) { Map.Entry<String, String> curEntry = iterConfig.next(); output = output + "Key: \t" + curEntry.getKey() + "\nValue: \t" + curEntry.getValue() + "\n"; } return output; }
/** * @param args * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // TODO Auto-generated method stub Configuration conf = new Configuration(); conf.set("inParameter", toString(args)); Job job = new Job(conf, "MovieGenreIdentifier"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setJarByClass(MovieGenreIdentifier.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
@Override public int run(String[] args) throws Exception { String locatorHost = args[0]; int locatorPort = Integer.parseInt(args[1]); String hdfsHomeDir = args[2]; System.out.println( "KnownKeysMRv2 invoked with args (locatorHost = " + locatorHost + " locatorPort = " + locatorPort + " hdfsHomeDir = " + hdfsHomeDir); Configuration conf = getConf(); conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion"); conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir); conf.setBoolean(GFInputFormat.CHECKPOINT, false); conf.set(GFOutputFormat.REGION, "validationRegion"); conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost); conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort); Job job = Job.getInstance(conf, "knownKeysMRv2"); job.setInputFormatClass(GFInputFormat.class); job.setOutputFormatClass(GFOutputFormat.class); job.setMapperClass(KnownKeysMRv2Mapper.class); job.setMapOutputKeyClass(GFKey.class); job.setMapOutputValueClass(PEIWritable.class); job.setReducerClass(KnownKeysMRv2Reducer.class); // job.setOutputKeyClass(String.class); // job.setOutputValueClass(ValueHolder.class); return job.waitForCompletion(false) ? 0 : 1; }
protected void setup(Context context) { Configuration config = context.getConfiguration(); inMovies = config.get("inParameter"); }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
/** Sets up configuration based on params */ private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) { if (argConf.get("file") == null) { logger.fatal("Missing file parameter"); System.exit(1); } if (argConf.get("hdfs_base_path") == null) { logger.fatal("Missing HDFS base path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("hdfs_temp_path") == null) { logger.fatal("Missing HDFS temp path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("local_temp_path") == null) { logger.fatal("Missing local temp path, check gestore-conf.xml"); System.exit(1); } // Input paramaters curConf.put("run_id", argConf.get("run", "")); curConf.put("task_id", argConf.get("task", "")); curConf.put("file_id", argConf.get("file")); curConf.put("local_path", argConf.get("path", "")); curConf.put("type", argConf.get("type", "l2r")); curConf.put("timestamp_start", argConf.get("timestamp_start", "1")); curConf.put( "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE))); curConf.put("delimiter", argConf.get("regex", "ID=.*")); curConf.put("taxon", argConf.get("taxon", "all")); curConf.put("intermediate", argConf.get("full_run", "false")); curConf.put("quick_add", argConf.get("quick_add", "false")); Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*"); curConf.put("format", argConf.get("format", "unknown")); curConf.put("split", argConf.get("split", "1")); curConf.put("copy", argConf.get("copy", "true")); // Constants curConf.put("base_path", argConf.get("hdfs_base_path")); curConf.put("temp_path", argConf.get("hdfs_temp_path")); curConf.put("local_temp_path", argConf.get("local_temp_path")); curConf.put("db_name_files", argConf.get("hbase_file_table")); curConf.put("db_name_runs", argConf.get("hbase_run_table")); curConf.put("db_name_updates", argConf.get("hbase_db_update_table")); // Timestamps Date currentTime = new Date(); Date endDate = new Date(new Long(curConf.get("timestamp_stop"))); curConf.put("timestamp_real", Long.toString(currentTime.getTime())); return true; }