@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); Matcher m = NUMBERS.matcher(line); m.find(); VarLongWritable userID = new VarLongWritable(Long.parseLong(m.group())); VarLongWritable itemID = new VarLongWritable(); while (m.find()) { itemID.set(Long.parseLong(m.group())); context.write(userID, itemID); } }
/** Sets up configuration based on params */ private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) { if (argConf.get("file") == null) { logger.fatal("Missing file parameter"); System.exit(1); } if (argConf.get("hdfs_base_path") == null) { logger.fatal("Missing HDFS base path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("hdfs_temp_path") == null) { logger.fatal("Missing HDFS temp path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("local_temp_path") == null) { logger.fatal("Missing local temp path, check gestore-conf.xml"); System.exit(1); } // Input paramaters curConf.put("run_id", argConf.get("run", "")); curConf.put("task_id", argConf.get("task", "")); curConf.put("file_id", argConf.get("file")); curConf.put("local_path", argConf.get("path", "")); curConf.put("type", argConf.get("type", "l2r")); curConf.put("timestamp_start", argConf.get("timestamp_start", "1")); curConf.put( "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE))); curConf.put("delimiter", argConf.get("regex", "ID=.*")); curConf.put("taxon", argConf.get("taxon", "all")); curConf.put("intermediate", argConf.get("full_run", "false")); curConf.put("quick_add", argConf.get("quick_add", "false")); Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*"); curConf.put("format", argConf.get("format", "unknown")); curConf.put("split", argConf.get("split", "1")); curConf.put("copy", argConf.get("copy", "true")); // Constants curConf.put("base_path", argConf.get("hdfs_base_path")); curConf.put("temp_path", argConf.get("hdfs_temp_path")); curConf.put("local_temp_path", argConf.get("local_temp_path")); curConf.put("db_name_files", argConf.get("hbase_file_table")); curConf.put("db_name_runs", argConf.get("hbase_run_table")); curConf.put("db_name_updates", argConf.get("hbase_db_update_table")); // Timestamps Date currentTime = new Date(); Date endDate = new Date(new Long(curConf.get("timestamp_stop"))); curConf.put("timestamp_real", Long.toString(currentTime.getTime())); return true; }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); String s = value.toString(); String input[] = s.split(","); Text outputkey = new Text(); Text outputvalue = new Text(); double result = 0.0; /* multiplies matrix and vector entry with matching column value */ result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1]))); outputkey.set(input[0]); outputvalue.set(Double.toString(result)); context.write(outputkey, outputvalue); }
/* called once at the beginning of the task */ public void setup(Context context) throws IOException, InterruptedException { BufferedReader br = null; Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration()); if (files != null && files.length > 0) br = new BufferedReader(new FileReader(files[0].toString())); String line = null; /* reads the cached file into a hashmap */ try { while ((line = br.readLine()) != null) { String input[] = line.split(","); vector.put(Long.valueOf(input[0]), Double.valueOf(input[1])); } } finally { br.close(); } }
/** * The work is done here for preparing the output Mutation. The TileIdWritable and * RasterWritable are transformed here. */ @Override public void write(TileIdWritable key, RasterWritable value) throws IOException, InterruptedException { int zoom = zoomLevel; if (key instanceof TileIdZoomWritable) { zoom = ((TileIdZoomWritable) key).getZoom(); } // ColumnVisibility cv = new ColumnVisibility(); // transform the keys ByteBuffer buf = ByteBuffer.allocate(8); buf.putLong(key.get()); Mutation m = new Mutation(new Text(buf.array())); // We only want the actual bytes for the value, not the full array of bytes Value v = new Value(value.copyBytes()); m.put(Integer.toString(zoom), Long.toString(key.get()), cv, v); _inRecordWriter.write(outTable, m); } // end write
public long calculatePreviousLinesInFile(IdSetterKey currentKey, TreeMap<String, Long> tempMap) { long result = 0; for (Map.Entry<String, Long> entry : tempMap.entrySet()) { String counterName = entry.getKey(); IdSetterKey keyFromCounter = new IdSetterKey( counterName.split(IdSetterKey.DELIMITER)[0], Long.parseLong(counterName.split(IdSetterKey.DELIMITER)[1]), 0); if (currentKey.compareTo(keyFromCounter) > 0) { result += entry.getValue(); } } return result; }
/** * Parse arguments and then runs a map/reduce job. Print output in standard out. * * @return a non-zero if there is an error. Otherwise, return 0. */ public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: " + getClass().getName() + " <nMaps> <nSamples>"); ToolRunner.printGenericCommandUsage(System.err); return 2; } final int nMaps = Integer.parseInt(args[0]); final long nSamples = Long.parseLong(args[1]); long now = System.currentTimeMillis(); int rand = new Random().nextInt(Integer.MAX_VALUE); final Path tmpDir = new Path(TMP_DIR_PREFIX + "_" + now + "_" + rand); System.out.println("Number of Maps = " + nMaps); System.out.println("Samples per Map = " + nSamples); System.out.println( "Estimated value of Pi is " + estimatePi(nMaps, nSamples, tmpDir, getConf())); return 0; }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
// Information needed to get a single file: // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM private static Vector<Path> getFile( FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception { Long latestVersion = latestVersion(config, db_util); try { config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); } catch (Exception E) { logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config)); return null; } if (Integer.parseInt(config.get("timestamp_start")) > Integer.parseInt(config.get("timestamp_stop"))) { return null; } logger.debug( "Getting DB for timestamp " + config.get("timestamp_start") + " to " + config.get("timestamp_stop")); String final_result = getFullPath(config); String temp_path_base = config.get("local_temp_path") + "_" + config.get("task_id") + "_" + config.get("run_id") + "/"; Path newPath = new Path(final_result + "*"); Vector<Path> ret_path = new Vector<Path>(); String lockName = lock(final_result.replaceAll("/", "_")); if (fs.globStatus(newPath).length != 0) { ret_path.add(newPath); unlock(lockName); config.put("full_file_name", final_result); return ret_path; } else { if (!config.get("source").equals("local")) { config.put("temp_path_base", temp_path_base); config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); Class<?> sourceClass = Class.forName("org.gestore.plugin.source." + config.get("source") + "Source"); Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class); Object processor = sourceClass.newInstance(); Object retVal; try { retVal = process_data.invoke(processor, config, fs); } catch (InvocationTargetException E) { Throwable exception = E.getTargetException(); logger.error("Unable to call method in child class: " + exception.toString()); exception.printStackTrace(System.out); unlock(lockName); return null; } FileStatus[] files = (FileStatus[]) retVal; if (files == null) { logger.error("Error getting files, no files returned"); return null; } for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(temp_path_base + config.get("file_id")); String suffix = getSuffix(config.get("file_id"), cur_file.getName()); cur_local_path = cur_local_path.suffix(suffix); Path res_path = new Path(new String(final_result + suffix)); logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString()); if (config.get("copy").equals("true")) { fs.moveFromLocalFile(cur_file, res_path); } else { fs.rename(cur_file, res_path); } } config.put("full_file_name", final_result); } } unlock(lockName); return ret_path; }
// The input video files are split into chunks of 64MB here... public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); System.out.println("job1:mapInp:-" + line); String[] info = line.split(" "); info[0] = info[0].trim(); info[1] = info[1].trim(); String lstfnames = "", fname = ""; try { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); String prefixPath = "", fnm = ""; Pattern x = Pattern.compile("(.*)/(.*)"); Matcher xm = x.matcher(info[0]); while (xm.find()) { prefixPath = xm.group(1); fnm = xm.group(2); } String dst = "/home/" + fnm; // dst is path of the file on local system. hdfs.copyToLocalFile(new Path(info[0]), new Path(dst)); Process p = Runtime.getRuntime().exec("ffmpeg -i " + dst); String s; BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); Pattern D = Pattern.compile("Duration:[ ]*([0-9]+):([0-9]+):([0-9]+)"); long time = 0; // "time" is the duration of the input video file long sps = 0; // "sps" is the number of seconds(duration) of each video split while ((s = stdError.readLine()) != null) { Matcher md = D.matcher(s); while (md.find()) { time = Long.parseLong(md.group(1)) * 3600 + Long.parseLong(md.group(2)) * 60 + Long.parseLong(md.group(3)); } } Process p1 = Runtime.getRuntime().exec("du -s " + dst); BufferedReader stdInput1 = new BufferedReader(new InputStreamReader(p1.getInputStream())); String s1 = "", size = ""; // "size" is the size of input video file while ((s1 = stdInput1.readLine()) != null) { String s11[] = s1.split("\t"); size = s11[0]; } sps = (64 * 1024) * time / (Long.parseLong(size)); // chunk size is 64MB String hr, min, sc; hr = Long.toString((sps / 3600)); min = Long.toString((sps % 3600) / 60); sc = Long.toString(sps % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; String splt = hr + ":" + min + ":" + sc; String query = "mencoder -oac copy -ovc copy -ss "; // building query to split the input video file String app = "", inpExt = ""; Pattern xx = Pattern.compile("(.*)\\.(.*)"); Matcher xxm = xx.matcher(dst); while (xxm.find()) { fname = xxm.group(1); inpExt = xxm.group(2); } String[] tmpArr = fname.split("/"); String hdfsFname = ""; long stSrt = 0; int cnt = 0; while (true) { if (stSrt > time) break; if (stSrt + sps > time) { long t = time - stSrt; hr = Long.toString((t / 3600)); min = Long.toString((t % 3600) / 60); sc = Long.toString(t % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; splt = hr + ":" + min + ":" + sc; } cnt++; hr = Long.toString((stSrt / 3600)); min = Long.toString((stSrt % 3600) / 60); sc = Long.toString(stSrt % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; app = hr + ":" + min + ":" + sc + " -endPos " + splt + " " + dst + " -o " + fname + "_" + Integer.toString(cnt) + "." + inpExt; Process p2 = Runtime.getRuntime().exec(query + app); String ls_str = ""; DataInputStream ls_in = new DataInputStream(p2.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p2.destroy(); String[] tmpArr1 = fnm.split("\\."); hdfs.copyFromLocalFile( true, true, new Path(fname + "_" + Integer.toString(cnt) + "." + inpExt), new Path(prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt)); lstfnames += prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt + " #!# "; stSrt += sps; } Runtime rt1 = Runtime.getRuntime(); String[] cmd1 = {"/bin/bash", "-c", "rm " + dst}; // delete the file after use Process pr1 = rt1.exec(cmd1); pr1.waitFor(); lstfnames += "*" + info[1]; context.write( new Text(fname), new Text( lstfnames)); // "fname" contains name of the input video file with // extension(eg.".avi") removed #### "lstfnames" is a string, contains // all the names of video splits(concatenated) System.out.println("lstfnames : " + lstfnames); } catch (IOException e) { System.out.println("exception happened - here's what I know: "); e.printStackTrace(); System.exit(-1); } }