public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String cur_file = ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName(); String train_file = context.getConfiguration().get("train_file"); if (cur_file.equals(train_file)) { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); myKey.set(word); myVal.set(f_id); context.write(myKey, myVal); } else { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); StringBuilder builder = new StringBuilder(dlt); while (st.hasMoreTokens()) { String filename = st.nextToken(); String tf_idf = st.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myKey.set(word); myVal.set(builder.toString()); context.write(myKey, myVal); } }
// specify input and out keys public void map( LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = value.toString(); // define new variable to be string ArrayList<Integer> range = new ArrayList<Integer>(); for (int i = 2000; i <= 2010; i++) { range.add(i); } // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"); String[] inputs = line.split(","); try { int year = Integer.parseInt(inputs[165]); if (range.contains(year)) { String dur = inputs[3]; String artist_name = inputs[2]; String song_title = inputs[1]; String final_input = artist_name + ',' + dur + ',' + song_title; Final_Value.set(final_input); output.collect(Final_Value, dummy); } } catch (NumberFormatException e) { // do nothing } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text word = new Text(); StringTokenizer s = new StringTokenizer(value.toString()); while (s.hasMoreTokens()) { word.set(s.nextToken()); context.write(word, one); } }
public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); context.write(word, one); } }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String input[]; double result = 0.0; /* adds all the values corresponding to a key */ for (Text value : values) { result += Double.parseDouble(value.toString()); } context.write(null, new Text(key + "," + Double.toString(result))); }
public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { LongWritable curNodeId = key; double previousPRValue = 1; double nextPRValue = 0; double localResidual = 0; String edgeListOfCurNode = ""; long localResidualTransformed = 0; for (Text value : values) { String[] inputInfo = value.toString().split("\\s+"); // incoming pagerank value if (inputInfo.length == 1) { nextPRValue += Double.parseDouble(inputInfo[0]); } // current node info else if (inputInfo.length == 3) { edgeListOfCurNode = inputInfo[2]; previousPRValue = Double.parseDouble(inputInfo[1]); } else if (inputInfo.length == 2) { previousPRValue = Double.parseDouble(inputInfo[1]); } else { System.out.println("ERROR: received unexpected TEXT in length"); } } if (previousPRValue == 1) System.out.println("No node info has been received by a reducer"); // calculate the pagerank value according to the given formula nextPRValue = pagerankFormula(nextPRValue); // should also iterate sink nodes list, add the evenly splitted value // reducer should store the updated node info(NPR) to output directory context.write(null, new Text(curNodeId + " " + nextPRValue + " " + edgeListOfCurNode)); // then compare PPR with NPR try { localResidual = Math.abs(previousPRValue - nextPRValue) / nextPRValue; localResidualTransformed = (long) (localResidual * 10000); // System.out.println("Make sure you got the right transformed residual : // "+localResidualTransformed); } catch (ArithmeticException e) { System.out.println("PPR is zero. Check where you get the value!"); } // assume there is a global counter called residualCounter; context.getCounter(myCounter.ResidualCounter.RESIDUAL_SUM).increment(localResidualTransformed); }
public void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { List<Double> newCentroid = null; int numPoints = 0; for (Text value : values) { ++numPoints; List<Double> p = GetPoint(value.toString()); if (newCentroid == null) { // initialize the new centroid to the first element newCentroid = new ArrayList<Double>(p); } else { for (int i = 0; i < newCentroid.size(); i++) { newCentroid.set(i, newCentroid.get(i) + p.get(i)); } } } // now the newCentroid contains the sum of all the points // so to get the average of all the points, we need to // divide each entry by the total number of points for (int i = 0; i < newCentroid.size(); i++) { newCentroid.set(i, newCentroid.get(i) / (double) numPoints); } // now create a string containing all the new centroid's coordinates String s = null; for (Double d : newCentroid) { if (s == null) { s = d.toString(); } else { s += " " + d.toString(); } } newCentroids.add(s); if (newCentroids.size() == 10) { WriteNewCentroids(context); } // output the centroid ID and the centroid data context.write(key, new Text(s)); }
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; boolean filtered = true; String keypair = key.toString(); String[] keys = keypair.split(" "); if (keys.length == 1) { filtered = false; } else { if (keys[0].equals("*") || keys[1].equals("*")) { filtered = false; } } if (!filtered) { for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); return; } for (IntWritable val : values) { if (val.get() == -1) { filtered = false; continue; } sum += val.get(); } // filter non-needed events if (filtered) return; context.write(key, new IntWritable(sum)); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); String s = value.toString(); String input[] = s.split(","); Text outputkey = new Text(); Text outputvalue = new Text(); double result = 0.0; /* multiplies matrix and vector entry with matching column value */ result = (Double.parseDouble(input[2])) * (vector.get(Long.parseLong(input[1]))); outputkey.set(input[0]); outputvalue.set(Double.toString(result)); context.write(outputkey, outputvalue); }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, DoubleWritable> output, Reporter reporter) throws IOException { /* * It implements the mapper. It outputs the numbers of weight and updated weights. * * Note that the format of intermediate output is <IntWritable, DoubleWritable>, * because the key is the number of weight (an integer), and the value is the weight's value (double) */ inputData = value.toString(); // go through the process initialize(); getposphase(); getnegphase(); update(); // output the intermediate data // The <key, value> pairs are <weightID, weightUpdate> double[][] vishidinc_array = vishidinc.getArray(); for (int i = 0; i < numdims; i++) { for (int j = 0; j < numhid; j++) { weightPos.set(i * numhid + j); weightValue.set(vishidinc_array[i][j]); output.collect(weightPos, weightValue); } } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { System.out.println("HELLO" + key + value); String line = value.toString(); String[] components = line.split("\\s+"); context.write(new Text(components[0]), new Text(components[1])); }
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String file = value.toString(); String[] lines = file.split("\n"); for (String line : lines) { if (line.contains("<author>") && line.contains("</author>")) { String author = line.substring(8, line.indexOf("</a")); word.set(author); context.write(word, one); } else if (line.contains("<author>")) { String author = line.substring(8); word.set(author); context.write(word, one); } } }
/** Implements the map-method of the Mapper-interface */ @Override public void map( LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (token.length() > 3) { word.set(token); output.collect(word, one); } } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] currentUserTuples = line.split("::"); int currentAgeValue = Integer.parseInt(currentUserTuples[2].trim()); if (currentUserTuples[1].trim().equalsIgnoreCase("M") && currentAgeValue <= targetAge) { context.write(new Text("UserId :: " + currentUserTuples[0].trim()), one); } }
public void reduce( Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException { boolean oldSet = false; boolean injectedSet = false; while (values.hasNext()) { CrawlDatum val = values.next(); if (val.getStatus() == CrawlDatum.STATUS_INJECTED) { injected.set(val); injected.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); injectedSet = true; } else { old.set(val); oldSet = true; } } CrawlDatum res = null; /** * Whether to overwrite, ignore or update existing records * * @see https://issues.apache.org/jira/browse/NUTCH-1405 */ // Injected record already exists and overwrite but not update if (injectedSet && oldSet && overwrite) { res = injected; if (update) { LOG.info(key.toString() + " overwritten with injected record but update was specified."); } } // Injected record already exists and update but not overwrite if (injectedSet && oldSet && update && !overwrite) { res = old; old.putAllMetaData(injected); old.setScore(injected.getScore() != scoreInjected ? injected.getScore() : old.getScore()); old.setFetchInterval( injected.getFetchInterval() != interval ? injected.getFetchInterval() : old.getFetchInterval()); } // Old default behaviour if (injectedSet && !oldSet) { res = injected; } else { res = old; } output.collect(key, res); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // read in a document (point in 58-dimensional space) List<Double> p = GetPoint(value.toString()); int idxClosestCentoid = IndexOfClosestCentroid(p); context.write(new IntWritable(idxClosestCentoid), value); }
/* * Finds a full file and sets it as the value. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { Text line = new Text(); boolean retrieved = true; String result = ""; value.clear(); while (retrieved) { retrieved = recordReader.next(key, line); if (line.toString().length() > 0) { String lineValue = line.toString(); result += lineValue + "\n"; } } value.set(result); return true; }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { String arr[] = value.toString().split("\\r?\\n"); for (String row : arr) { if (row.startsWith("\"")) { continue; } String parts[] = row.split(","); output.collect(new IntWritable(new Integer(parts[1])), new Text(parts[4])); } }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String[] pair = new String[2]; int count = 0; for (Text txt : values) { pair[count] = txt.toString(); count++; } // word exists in training if (count == 2) { StringTokenizer st_one, st_two; if (pair[0].contains(dlt)) { st_one = new StringTokenizer(pair[1]); st_two = new StringTokenizer(pair[0]); } else { st_one = new StringTokenizer(pair[0]); st_two = new StringTokenizer(pair[1]); } // outputting the data String f_id = st_one.nextToken(); StringBuilder builder = new StringBuilder(dlt); builder.append(f_id); builder.append(dlt); while (st_two.hasMoreTokens()) { String filename = st_two.nextToken(); String tf_idf = st_two.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myVal.set(builder.toString()); context.write(key, myVal); } }
public void map( LongWritable key, Text value, OutputCollector<DoubleWritable, DoubleWritable> output, Reporter reporter) throws IOException { String line = value.toString(); DoubleWritable clave = new DoubleWritable(); DoubleWritable valor = new DoubleWritable(); clave.set(Double.parseDouble(line)); valor.set(Math.sqrt(Double.parseDouble(line))); output.collect(clave, valor); }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); IntWritable clave = new IntWritable(); IntWritable valor = new IntWritable(); clave.set(Integer.parseInt(line)); valor.set(Integer.parseInt(line) + 1); output.collect(clave, valor); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); splitposition = line.indexOf("\t"); labels = line.substring(0, splitposition); content = line.substring(splitposition + 1, line.length()); if (content.length() <= 5) { word.set(labels); int counter = Integer.parseInt(content); context.write(word, new IntWritable(counter)); return; } labeltokens = labels.split(","); contenttokens = tokenizeDoc(content); for (String label : labelspace) { for (String token : contenttokens) { // filter token, denotes the needed events; word.set(label + " " + token); context.write(word, new IntWritable(-1)); } } }
/* Finds a full sentence and sets it as the value. * If the sentence is shorter than the full line, the rest is stored to use later. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { Text line = new Text(); boolean getMore = true; boolean retrieved = false; String result = leftovers; leftovers = ""; value.clear(); while (getMore) { retrieved = recordReader.next(key, line); if (retrieved) { String lineValue = line.toString(); // here, we assume sentences run until the period. int endOfSentence = lineValue.indexOf('.'); if (endOfSentence == -1) { result += " " + lineValue; } else { result += " " + lineValue.substring(0, endOfSentence + 1); leftovers = lineValue.substring(endOfSentence + 1); getMore = false; } } else { getMore = false; value.set(result); return false; } } value.set(result); return true; }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] currentMovieTuples = line.split("::"); String[] inMovieList = inMovies.split(","); for (String s : inMovieList) { if (currentMovieTuples[1].trim().equalsIgnoreCase(s)) context.write( new Text( "Movie :: " + currentMovieTuples[1].trim() + " Genre :: " + currentMovieTuples[2].trim()), one); } }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, DoubleWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); int rowIdx = 0; double xValue = 0; if (tokenizer.hasMoreTokens()) { rowIdx = Integer.parseInt(tokenizer.nextToken()); xValue = Double.parseDouble(tokenizer.nextToken()); } double xResult = (resVec[rowIdx] - (sumVec[rowIdx] * xValue)) / diaVec[rowIdx]; output.collect(new IntWritable(rowIdx), new DoubleWritable(xResult)); }
public void map( WritableComparable<?> key, Text value, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException { String url = value.toString(); // value is line of text if (url != null && url.trim().startsWith("#")) { /* Ignore line that start with # */ return; } // if tabs : metadata that could be stored // must be name=value and separated by \t float customScore = -1f; int customInterval = interval; int fixedInterval = -1; Map<String, String> metadata = new TreeMap<String, String>(); if (url.indexOf("\t") != -1) { String[] splits = url.split("\t"); url = splits[0]; for (int s = 1; s < splits.length; s++) { // find separation between name and value int indexEquals = splits[s].indexOf("="); if (indexEquals == -1) { // skip anything without a = continue; } String metaname = splits[s].substring(0, indexEquals); String metavalue = splits[s].substring(indexEquals + 1); if (metaname.equals(nutchScoreMDName)) { try { customScore = Float.parseFloat(metavalue); } catch (NumberFormatException nfe) { } } else if (metaname.equals(nutchFetchIntervalMDName)) { try { customInterval = Integer.parseInt(metavalue); } catch (NumberFormatException nfe) { } } else if (metaname.equals(nutchFixedFetchIntervalMDName)) { try { fixedInterval = Integer.parseInt(metavalue); } catch (NumberFormatException nfe) { } } else metadata.put(metaname, metavalue); } } try { url = urlNormalizers.normalize(url, URLNormalizers.SCOPE_INJECT); url = filters.filter(url); // filter the url } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn("Skipping " + url + ":" + e); } url = null; } if (url == null) { reporter.getCounter("injector", "urls_filtered").increment(1); } else { // if it passes value.set(url); // collect it CrawlDatum datum = new CrawlDatum(); datum.setStatus(CrawlDatum.STATUS_INJECTED); // Is interval custom? Then set as meta data if (fixedInterval > -1) { // Set writable using float. Flaot is used by AdaptiveFetchSchedule datum .getMetaData() .put(Nutch.WRITABLE_FIXED_INTERVAL_KEY, new FloatWritable(fixedInterval)); datum.setFetchInterval(fixedInterval); } else { datum.setFetchInterval(customInterval); } datum.setFetchTime(curTime); // now add the metadata Iterator<String> keysIter = metadata.keySet().iterator(); while (keysIter.hasNext()) { String keymd = keysIter.next(); String valuemd = metadata.get(keymd); datum.getMetaData().put(new Text(keymd), new Text(valuemd)); } if (customScore != -1) datum.setScore(customScore); else datum.setScore(scoreInjected); try { scfilters.injectedScore(value, datum); } catch (ScoringFilterException e) { if (LOG.isWarnEnabled()) { LOG.warn( "Cannot filter injected score for url " + url + ", using default (" + e.getMessage() + ")"); } } reporter.getCounter("injector", "urls_injected").increment(1); output.collect(value, datum); } }