public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); if (word.toString().length() == 7) { context.write(word, one); } } }
@Override protected void reduce(Text key, Iterable<Text> vals, Context ctx) { String name = null; String year = null; for (Text xx : vals) { String[] parts = xx.toString().split("="); if (parts[0].equals("name")) { name = parts[1]; } if (parts[0].equals("year")) { year = parts[1]; } } try { if (name != null && year != null) { ctx.write(new Text(year), new Text(name)); } } catch (Exception ee) { ee.printStackTrace(System.err); throw new Error("I give up"); } }
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; boolean filtered = true; String keypair = key.toString(); String[] keys = keypair.split(" "); if (keys.length == 1) { filtered = false; } else { if (keys[0].equals("*") || keys[1].equals("*")) { filtered = false; } } if (!filtered) { for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); return; } for (IntWritable val : values) { if (val.get() == -1) { filtered = false; continue; } sum += val.get(); } // filter non-needed events if (filtered) return; context.write(key, new IntWritable(sum)); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] arr = line.split(","); word.set(arr[0]); context.write(word, one); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { System.out.println("HELLO" + key + value); String line = value.toString(); String[] components = line.split("\\s+"); context.write(new Text(components[0]), new Text(components[1])); }
// Identity public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { System.out.println("I'm in Job1 reduce"); for (Text val : values) { System.out.println("job1:redInp:-" + val.toString()); context.write(new Text(""), val); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text word = new Text(); StringTokenizer s = new StringTokenizer(value.toString()); while (s.hasMoreTokens()) { word.set(s.nextToken()); context.write(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] currentUserTuples = line.split("::"); int currentAgeValue = Integer.parseInt(currentUserTuples[2].trim()); if (currentUserTuples[1].trim().equalsIgnoreCase("M") && currentAgeValue <= targetAge) { context.write(new Text("UserId :: " + currentUserTuples[0].trim()), one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // read in a document (point in 58-dimensional space) List<Double> p = GetPoint(value.toString()); int idxClosestCentoid = IndexOfClosestCentroid(p); context.write(new IntWritable(idxClosestCentoid), value); }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); int movieId = Integer.parseInt(tokenizer.nextToken()); while (tokenizer.hasMoreTokens()) { String word = tokenizer.nextToken(); context.write(new Text("1"), new IntWritable(1)); } }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String input[]; double result = 0.0; /* adds all the values corresponding to a key */ for (Text value : values) { result += Double.parseDouble(value.toString()); } context.write(null, new Text(key + "," + Double.toString(result))); }
public void map( Text key, Text val, org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { int i = 0, n = 0, j = 0, lj = 0, hj = 0; String tem = ""; initStopWordsMap(); // initialize the stop list String line = val.toString(); StringTokenizer itr = new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter n = itr.countTokens(); cache = new String[n]; for (i = 0; i < n; i++) { cache[i] = new String(""); // initialize the cache } i = 0; while (itr.hasMoreTokens()) { cache[i] = itr.nextToken(); // padding the cache with the words of the content i++; } for (i = 0; i < n; i++) { keyWord = cache[i]; keyWord = keyWord.trim(); if (!hmStopWord.containsKey(keyWord)) { lj = i - 10; hj = i + 10; if (lj < 0) lj = 0; if (hj > n) hj = n; tem = " "; for (j = lj; j < hj; j++) tem += cache[j] + " "; location = new Text(); location.set(key.toString() + tem); context.write(new Text(keyWord), location); } } }
public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { LongWritable curNodeId = key; double previousPRValue = 1; double nextPRValue = 0; double localResidual = 0; String edgeListOfCurNode = ""; long localResidualTransformed = 0; for (Text value : values) { String[] inputInfo = value.toString().split("\\s+"); // incoming pagerank value if (inputInfo.length == 1) { nextPRValue += Double.parseDouble(inputInfo[0]); } // current node info else if (inputInfo.length == 3) { edgeListOfCurNode = inputInfo[2]; previousPRValue = Double.parseDouble(inputInfo[1]); } else if (inputInfo.length == 2) { previousPRValue = Double.parseDouble(inputInfo[1]); } else { System.out.println("ERROR: received unexpected TEXT in length"); } } if (previousPRValue == 1) System.out.println("No node info has been received by a reducer"); // calculate the pagerank value according to the given formula nextPRValue = pagerankFormula(nextPRValue); // should also iterate sink nodes list, add the evenly splitted value // reducer should store the updated node info(NPR) to output directory context.write(null, new Text(curNodeId + " " + nextPRValue + " " + edgeListOfCurNode)); // then compare PPR with NPR try { localResidual = Math.abs(previousPRValue - nextPRValue) / nextPRValue; localResidualTransformed = (long) (localResidual * 10000); // System.out.println("Make sure you got the right transformed residual : // "+localResidualTransformed); } catch (ArithmeticException e) { System.out.println("PPR is zero. Check where you get the value!"); } // assume there is a global counter called residualCounter; context.getCounter(myCounter.ResidualCounter.RESIDUAL_SUM).increment(localResidualTransformed); }
public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { List<Double> newCentroid = null; int numPoints = 0; for (Text value : values) { ++numPoints; List<Double> p = GetPoint(value.toString()); if (newCentroid == null) { // initialize the new centroid to the first element newCentroid = new ArrayList<Double>(p); } else { for (int i = 0; i < newCentroid.size(); i++) { newCentroid.set(i, newCentroid.get(i) + p.get(i)); } } } // now the newCentroid contains the sum of all the points // so to get the average of all the points, we need to // divide each entry by the total number of points for (int i = 0; i < newCentroid.size(); i++) { newCentroid.set(i, newCentroid.get(i) / (double) numPoints); } // now create a string containing all the new centroid's coordinates String s = null; for (Double d : newCentroid) { if (s == null) { s = d.toString(); } else { s += " " + d.toString(); } } newCentroids.add(s); if (newCentroids.size() == 10) { WriteNewCentroids(context); } // output the centroid ID and the centroid data context.write(key, new Text(s)); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] currentMovieTuples = line.split("::"); String[] inMovieList = inMovies.split(","); for (String s : inMovieList) { if (currentMovieTuples[1].trim().equalsIgnoreCase(s)) context.write( new Text( "Movie :: " + currentMovieTuples[1].trim() + " Genre :: " + currentMovieTuples[2].trim()), one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); String s = value.toString(); String input[] = s.split(","); Text outputkey = new Text(); Text outputvalue = new Text(); double result = 0.0; /* multiplies matrix and vector entry with matching column value */ result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1]))); outputkey.set(input[0]); outputvalue.set(Double.toString(result)); context.write(outputkey, outputvalue); }
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String file = value.toString(); String[] lines = file.split("\n"); for (String line : lines) { if (line.contains("<author>") && line.contains("</author>")) { String author = line.substring(8, line.indexOf("</a")); word.set(author); context.write(word, one); } else if (line.contains("<author>")) { String author = line.substring(8); word.set(author); context.write(word, one); } } }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String str = "1\t" + value.toString(); Kvector vector = Kvector.fromString(str); int code = -1, d = Integer.MAX_VALUE; for (Kvector e : m_kVectors) { int t = vector.distance(e); if (t < d) { d = t; code = e.code; } } context.write(new KcodeWritable(code), new Text(str)); }
public void reduce( Text key, Iterable<Text> values, org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, InvertedListWritable>.Context context) throws IOException, InterruptedException { InvertedListWritable invertedList = new InvertedListWritable(); for (Text k : values) { StringTokenizer itr = new StringTokenizer(k.toString()); url = ""; abs = ""; if (itr.hasMoreTokens()) url = itr.nextToken(); while (itr.hasMoreTokens()) { abs += itr.nextToken() + " "; } dr = new ListURL(url, abs); invertedList.paddingValueKey(dr); } invertedList.quickSortNodeKey(); context.write(key, invertedList); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); splitposition = line.indexOf("\t"); labels = line.substring(0, splitposition); content = line.substring(splitposition + 1, line.length()); if (content.length() <= 5) { word.set(labels); int counter = Integer.parseInt(content); context.write(word, new IntWritable(counter)); return; } labeltokens = labels.split(","); contenttokens = tokenizeDoc(content); for (String label : labelspace) { for (String token : contenttokens) { // filter token, denotes the needed events; word.set(label + " " + token); context.write(word, new IntWritable(-1)); } } }
public void write(DataOutput out) throws IOException { out.writeUTF(leftBigram.toString()); out.writeUTF(rightBigram.toString()); }
// The input video files are split into chunks of 64MB here... public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); System.out.println("job1:mapInp:-" + line); String[] info = line.split(" "); info[0] = info[0].trim(); info[1] = info[1].trim(); String lstfnames = "", fname = ""; try { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); String prefixPath = "", fnm = ""; Pattern x = Pattern.compile("(.*)/(.*)"); Matcher xm = x.matcher(info[0]); while (xm.find()) { prefixPath = xm.group(1); fnm = xm.group(2); } String dst = "/home/" + fnm; // dst is path of the file on local system. hdfs.copyToLocalFile(new Path(info[0]), new Path(dst)); Process p = Runtime.getRuntime().exec("ffmpeg -i " + dst); String s; BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); Pattern D = Pattern.compile("Duration:[ ]*([0-9]+):([0-9]+):([0-9]+)"); long time = 0; // "time" is the duration of the input video file long sps = 0; // "sps" is the number of seconds(duration) of each video split while ((s = stdError.readLine()) != null) { Matcher md = D.matcher(s); while (md.find()) { time = Long.parseLong(md.group(1)) * 3600 + Long.parseLong(md.group(2)) * 60 + Long.parseLong(md.group(3)); } } Process p1 = Runtime.getRuntime().exec("du -s " + dst); BufferedReader stdInput1 = new BufferedReader(new InputStreamReader(p1.getInputStream())); String s1 = "", size = ""; // "size" is the size of input video file while ((s1 = stdInput1.readLine()) != null) { String s11[] = s1.split("\t"); size = s11[0]; } sps = (64 * 1024) * time / (Long.parseLong(size)); // chunk size is 64MB String hr, min, sc; hr = Long.toString((sps / 3600)); min = Long.toString((sps % 3600) / 60); sc = Long.toString(sps % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; String splt = hr + ":" + min + ":" + sc; String query = "mencoder -oac copy -ovc copy -ss "; // building query to split the input video file String app = "", inpExt = ""; Pattern xx = Pattern.compile("(.*)\\.(.*)"); Matcher xxm = xx.matcher(dst); while (xxm.find()) { fname = xxm.group(1); inpExt = xxm.group(2); } String[] tmpArr = fname.split("/"); String hdfsFname = ""; long stSrt = 0; int cnt = 0; while (true) { if (stSrt > time) break; if (stSrt + sps > time) { long t = time - stSrt; hr = Long.toString((t / 3600)); min = Long.toString((t % 3600) / 60); sc = Long.toString(t % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; splt = hr + ":" + min + ":" + sc; } cnt++; hr = Long.toString((stSrt / 3600)); min = Long.toString((stSrt % 3600) / 60); sc = Long.toString(stSrt % 60); if (hr.length() < 2) hr = "0" + hr; if (min.length() < 2) min = "0" + min; if (sc.length() < 2) sc = "0" + sc; app = hr + ":" + min + ":" + sc + " -endPos " + splt + " " + dst + " -o " + fname + "_" + Integer.toString(cnt) + "." + inpExt; Process p2 = Runtime.getRuntime().exec(query + app); String ls_str = ""; DataInputStream ls_in = new DataInputStream(p2.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p2.destroy(); String[] tmpArr1 = fnm.split("\\."); hdfs.copyFromLocalFile( true, true, new Path(fname + "_" + Integer.toString(cnt) + "." + inpExt), new Path(prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt)); lstfnames += prefixPath + "/" + tmpArr1[0] + "_" + Integer.toString(cnt) + "." + inpExt + " #!# "; stSrt += sps; } Runtime rt1 = Runtime.getRuntime(); String[] cmd1 = {"/bin/bash", "-c", "rm " + dst}; // delete the file after use Process pr1 = rt1.exec(cmd1); pr1.waitFor(); lstfnames += "*" + info[1]; context.write( new Text(fname), new Text( lstfnames)); // "fname" contains name of the input video file with // extension(eg.".avi") removed #### "lstfnames" is a string, contains // all the names of video splits(concatenated) System.out.println("lstfnames : " + lstfnames); } catch (IOException e) { System.out.println("exception happened - here's what I know: "); e.printStackTrace(); System.exit(-1); } }
// merge the converted files here public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { System.out.println("I'm in Job2 reduce"); Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); try { String out = ""; for (Text t : values) { out = t.toString(); out = out.trim(); System.out.println("job2:redInp:-" + out); break; } String[] outl = out.split(" #!# "); Pattern x = Pattern.compile("(.*)/(.*)\\.(.*)"); Matcher xm = x.matcher(outl[0]); String prefixPath = "", fnm = "", ext = ""; while (xm.find()) { prefixPath = xm.group(1); fnm = xm.group(2); ext = xm.group(3); } String foutname = fnm.split("_")[0]; foutname += "." + ext; String query = "mencoder -oac copy -ovc copy"; int cnt = 0; for (String st : outl) { cnt++; hdfs.copyToLocalFile( true, new Path(st), new Path("/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext)); query += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext; } query += " -o " + "/home/" + foutname; Process p2 = Runtime.getRuntime().exec(query); // query for merging the video files is executed here String ls_str = ""; DataInputStream ls_in = new DataInputStream(p2.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p2.destroy(); hdfs.copyFromLocalFile( true, true, new Path("/home/" + foutname), new Path(prefixPath + "/" + foutname)); cnt = 0; String dlt1 = ""; for (String st3 : outl) { cnt++; dlt1 += " " + "/home/" + fnm.split("_")[0] + "_" + Integer.toString(cnt) + "." + ext; } Runtime rt1 = Runtime.getRuntime(); String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt1}; // delete the files after use Process pr1 = rt1.exec(cmd1); pr1.waitFor(); context.write(new Text(""), new Text(prefixPath + "/" + foutname)); } catch (IOException e) { System.out.println("exception happened - here's what I know: "); e.printStackTrace(); System.exit(-1); } }
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Iterator<Text> itr = values.iterator(); Text input = new Text(); String[] inputTokens = null; // initialize/reset all variables Double pageRankOld = 0.0; Double residualError = 0.0; String output = ""; Integer maxNode = 0; ArrayList<String> temp = new ArrayList<String>(); Double tempBC = 0.0; vList.clear(); newPR.clear(); BE.clear(); BC.clear(); nodeDataMap.clear(); while (itr.hasNext()) { input = itr.next(); inputTokens = input.toString().split(" "); // if first element is PR, it is the node ID, previous pagerank and outgoing edgelist for this // node if (inputTokens[0].equals("PR")) { String nodeID = inputTokens[1]; pageRankOld = Double.parseDouble(inputTokens[2]); newPR.put(nodeID, pageRankOld); NodeData node = new NodeData(); node.setNodeID(nodeID); node.setPageRank(pageRankOld); if (inputTokens.length == 4) { node.setEdgeList(inputTokens[3]); node.setDegrees(inputTokens[3].split(",").length); } vList.add(nodeID); nodeDataMap.put(nodeID, node); // keep track of the max nodeID for this block if (Integer.parseInt(nodeID) > maxNode) { maxNode = Integer.parseInt(nodeID); } // if BE, it is an in-block edge } else if (inputTokens[0].equals("BE")) { if (BE.containsKey(inputTokens[2])) { // Initialize BC for this v temp = BE.get(inputTokens[2]); } else { temp = new ArrayList<String>(); } temp.add(inputTokens[1]); BE.put(inputTokens[2], temp); // if BC, it is an incoming node from outside of the block } else if (inputTokens[0].equals("BC")) { if (BC.containsKey(inputTokens[2])) { // Initialize BC for this v tempBC = BC.get(inputTokens[2]); } else { tempBC = 0.0; } tempBC += Double.parseDouble(inputTokens[3]); BC.put(inputTokens[2], tempBC); } } int i = 0; do { i++; residualError = IterateBlockOnce(); // System.out.println("Block " + key + " pass " + i + " resError:" + residualError); } while (residualError > threshold); // i < maxIterations && // compute the ultimate residual error for each node in this block residualError = 0.0; for (String v : vList) { NodeData node = nodeDataMap.get(v); residualError += Math.abs(node.getPageRank() - newPR.get(v)) / newPR.get(v); } residualError = residualError / vList.size(); // System.out.println("Block " + key + " overall resError for iteration: " + residualError); // add the residual error to the counter that is tracking the overall sum (must be expressed as // a long value) long residualAsLong = (long) Math.floor(residualError * PageRankBlock.precision); long numberOfIterations = (long) (i); context.getCounter(PageRankBlock.ProjectCounters.RESIDUAL_ERROR).increment(residualAsLong); context .getCounter(PageRankBlock.ProjectCounters.AVERAGE_ITERATIONS) .increment(numberOfIterations); // output should be // key:nodeID (for this node) // value:<pageRankNew> <degrees> <comma-separated outgoing edgeList> for (String v : vList) { NodeData node = nodeDataMap.get(v); output = newPR.get(v) + " " + node.getDegrees() + " " + node.getEdgeList(); Text outputText = new Text(output); Text outputKey = new Text(v); context.write(outputKey, outputText); if (v.equals(maxNode.toString())) { System.out.println("Block:" + key + " | node:" + v + " | pageRank:" + newPR.get(v)); } } cleanup(context); }
// Video splts are converted to target format here... public void map(Object key, Text value, Context context) throws IOException, InterruptedException { try { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); String st = value.toString(); st = st.trim(); System.out.println("job2:mapInp:-" + st); String[] fmt = st.split(" #!# \\*"); String[] lst = fmt[0].split(" #!# "); String out = "", dlt = ""; int flag = 1; for (String st1 : lst) { Pattern x = Pattern.compile("(.*)/(.*)"); Matcher xm = x.matcher(st1); String prefixPath = "", fnm = "", inpExt = ""; while (xm.find()) { prefixPath = xm.group(1); fnm = xm.group(2); } String[] tmpArr = fnm.split("\\."); fnm = tmpArr[0]; inpExt = tmpArr[1]; hdfs.copyToLocalFile(true, new Path(st1), new Path("/home/" + fnm + "." + inpExt)); String fname = "/home/" + fnm; if (flag == 1) { flag = 0; out += prefixPath + "/" + fnm + "." + fmt[1]; } else { out += " #!# " + prefixPath + "/" + fnm + "." + fmt[1]; } if (fmt[1].equals("mpg") || fmt[1].equals("mpeg") || fmt[1].equals("mp4")) { Process p = Runtime.getRuntime() .exec( "mencoder -of mpeg -ovc lavc -lavcopts vcodec=mpeg1video -oac copy " + "/home/" + fnm + "." + inpExt + " -o " + fname + "." + fmt[1]); String ls_str = ""; DataInputStream ls_in = new DataInputStream(p.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p.destroy(); dlt += " /home/" + fnm + "." + inpExt; } else if (fmt[1].equals("avi")) { Process p = Runtime.getRuntime() .exec( "mencoder -ovc lavc -oac mp3lame -o " + fname + "." + fmt[1] + " " + "/home/" + fnm + "." + inpExt); String ls_str = ""; DataInputStream ls_in = new DataInputStream(p.getInputStream()); while ((ls_str = ls_in.readLine()) != null) {} p.destroy(); dlt += " /home/" + fnm + "." + inpExt; } else { // TBD System.out.println("Unsupported target format!!!!!"); } hdfs.copyFromLocalFile( true, true, new Path(fname + "." + fmt[1]), new Path(prefixPath + "/" + fnm + "." + fmt[1])); } Runtime rt1 = Runtime.getRuntime(); String[] cmd1 = {"/bin/bash", "-c", "rm" + dlt}; // delete the files after use Process pr1 = rt1.exec(cmd1); pr1.waitFor(); System.out.println("Job2 mapOut:" + out); context.write(new Text(lst[0]), new Text(out)); System.out.println(out); } catch (IOException e) { System.out.println("exception happened - here's what I know: "); e.printStackTrace(); System.exit(-1); } }