public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String cur_file = ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName(); String train_file = context.getConfiguration().get("train_file"); if (cur_file.equals(train_file)) { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); myKey.set(word); myVal.set(f_id); context.write(myKey, myVal); } else { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); StringBuilder builder = new StringBuilder(dlt); while (st.hasMoreTokens()) { String filename = st.nextToken(); String tf_idf = st.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myKey.set(word); myVal.set(builder.toString()); context.write(myKey, myVal); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); if (word.toString().length() == 7) { context.write(word, one); } } }
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; boolean filtered = true; String keypair = key.toString(); String[] keys = keypair.split(" "); if (keys.length == 1) { filtered = false; } else { if (keys[0].equals("*") || keys[1].equals("*")) { filtered = false; } } if (!filtered) { for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); return; } for (IntWritable val : values) { if (val.get() == -1) { filtered = false; continue; } sum += val.get(); } // filter non-needed events if (filtered) return; context.write(key, new IntWritable(sum)); }
public void map( LongWritable key, Text value, OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException { String line = value.toString(); if (validate(line, reporter)) output.collect(key, value); }
@Override protected void reduce(Text key, Iterable<Text> vals, Context ctx) { String name = null; String year = null; for (Text xx : vals) { String[] parts = xx.toString().split("="); if (parts[0].equals("name")) { name = parts[1]; } if (parts[0].equals("year")) { year = parts[1]; } } try { if (name != null && year != null) { ctx.write(new Text(year), new Text(name)); } } catch (Exception ee) { ee.printStackTrace(System.err); throw new Error("I give up"); } }
// specify input and out keys public void map( LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = value.toString(); // define new variable to be string ArrayList<Integer> range = new ArrayList<Integer>(); for (int i = 2000; i <= 2010; i++) { range.add(i); } // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"); String[] inputs = line.split(","); try { int year = Integer.parseInt(inputs[165]); if (range.contains(year)) { String dur = inputs[3]; String artist_name = inputs[2]; String song_title = inputs[1]; String final_input = artist_name + ',' + dur + ',' + song_title; Final_Value.set(final_input); output.collect(Final_Value, dummy); } } catch (NumberFormatException e) { // do nothing } }
@Override public void reduce( Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = ""; String header = ""; TreeMap<String, String> ciudades = new TreeMap<String, String>(); // Obtenemos los datos y los metemos a un treemap para que los ordene por ciudad while (values.hasNext()) { String[] tmp = values.next().toString().split(","); String ciudad = tmp[0]; String mes = tmp[1]; String temperatura = tmp[2]; String fecha = tmp[3]; ciudades.put(ciudad, tmp[1] + "," + tmp[2] + "," + tmp[3]); } // Recorremos las ciudades y vamos imprimiendo for (String ciudad : ciudades.keySet()) { header += ciudad + ",,"; String[] temporal = ciudades.get(ciudad).split(","); line += temporal[2] + "," + temporal[1] + ","; } if (c == 0) { // Imprimimos cabezera output.collect(new Text("Año,"), new Text(header)); c++; } output.collect(new Text(key.toString() + ","), new Text(line)); }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, DoubleWritable> output, Reporter reporter) throws IOException { /* * It implements the mapper. It outputs the numbers of weight and updated weights. * * Note that the format of intermediate output is <IntWritable, DoubleWritable>, * because the key is the number of weight (an integer), and the value is the weight's value (double) */ inputData = value.toString(); // go through the process initialize(); getposphase(); getnegphase(); update(); // output the intermediate data // The <key, value> pairs are <weightID, weightUpdate> double[][] vishidinc_array = vishidinc.getArray(); for (int i = 0; i < numdims; i++) { for (int j = 0; j < numhid; j++) { weightPos.set(i * numhid + j); weightValue.set(vishidinc_array[i][j]); output.collect(weightPos, weightValue); } } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int dim = tokenizer.countTokens(); double[] coords = new double[dim]; for (int i = 0; i < dim; i++) { coords[i] = Double.valueOf(tokenizer.nextToken()); } Point point = new Point(coords); Centroid nearest = null; double nearestDistance = Double.MAX_VALUE; for (Centroid c : centers) { double dist = point.euclideanDistance(c); if (nearest == null) { nearest = c; nearestDistance = dist; } else { if (dist < nearestDistance) { nearest = c; nearestDistance = dist; } } } context.write(nearest, point); }
// Identity public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { System.out.println("I'm in Job1 reduce"); for (Text val : values) { System.out.println("job1:redInp:-" + val.toString()); context.write(new Text(""), val); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] arr = line.split(","); word.set(arr[0]); context.write(word, one); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { System.out.println("HELLO" + key + value); String line = value.toString(); String[] components = line.split("\\s+"); context.write(new Text(components[0]), new Text(components[1])); }
@Test public void get() throws Exception { // vv MapFileSeekTest Text value = new Text(); reader.get(new IntWritable(496), value); assertThat(value.toString(), is("One, two, buckle my shoe")); // ^^ MapFileSeekTest }
public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text word = new Text(); StringTokenizer s = new StringTokenizer(value.toString()); while (s.hasMoreTokens()) { word.set(s.nextToken()); context.write(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] currentUserTuples = line.split("::"); int currentAgeValue = Integer.parseInt(currentUserTuples[2].trim()); if (currentUserTuples[1].trim().equalsIgnoreCase("M") && currentAgeValue <= targetAge) { context.write(new Text("UserId :: " + currentUserTuples[0].trim()), one); } }
public void reduce( Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException { boolean oldSet = false; boolean injectedSet = false; while (values.hasNext()) { CrawlDatum val = values.next(); if (val.getStatus() == CrawlDatum.STATUS_INJECTED) { injected.set(val); injected.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); injectedSet = true; } else { old.set(val); oldSet = true; } } CrawlDatum res = null; /** * Whether to overwrite, ignore or update existing records * * @see https://issues.apache.org/jira/browse/NUTCH-1405 */ // Injected record already exists and overwrite but not update if (injectedSet && oldSet && overwrite) { res = injected; if (update) { LOG.info(key.toString() + " overwritten with injected record but update was specified."); } } // Injected record already exists and update but not overwrite if (injectedSet && oldSet && update && !overwrite) { res = old; old.putAllMetaData(injected); old.setScore(injected.getScore() != scoreInjected ? injected.getScore() : old.getScore()); old.setFetchInterval( injected.getFetchInterval() != interval ? injected.getFetchInterval() : old.getFetchInterval()); } // Old default behaviour if (injectedSet && !oldSet) { res = injected; } else { res = old; } output.collect(key, res); }
@Override protected void reduce( Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { BigDecimal sum = new BigDecimal(0); for (Text val : values) { sum = sum.add(new BigDecimal(val.toString())); } context.write(key, new Text(sum.toString())); }
public void map( IntWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { String dataRow = value.toString(); StringTokenizer tk = new StringTokenizer(dataRow); String label = tk.nextToken(); String image = tk.nextToken(); dataString.set(label + "\t" + image); output.collect(sameKey, dataString); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // read in a document (point in 58-dimensional space) List<Double> p = GetPoint(value.toString()); int idxClosestCentoid = IndexOfClosestCentroid(p); context.write(new IntWritable(idxClosestCentoid), value); }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); int movieId = Integer.parseInt(tokenizer.nextToken()); while (tokenizer.hasMoreTokens()) { String word = tokenizer.nextToken(); context.write(new Text("1"), new IntWritable(1)); } }
/* * Finds a full file and sets it as the value. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { Text line = new Text(); boolean retrieved = true; String result = ""; value.clear(); while (retrieved) { retrieved = recordReader.next(key, line); if (line.toString().length() > 0) { String lineValue = line.toString(); result += lineValue + "\n"; } } value.set(result); return true; }
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { Float avg = Float.parseFloat(value.toString()); String word = key.toString(); String[] parts = word.split("-"); String newKey = parts[0] + "-" + parts[1]; String carrier = parts[2]; if (!carrierAvgDelay.containsKey(newKey)) { TreeSet<Pair<Float, String>> avgDelayMap = new TreeSet<Pair<Float, String>>(); carrierAvgDelay.put(newKey, avgDelayMap); } TreeSet<Pair<Float, String>> ts = carrierAvgDelay.get(newKey); ts.add(new Pair<Float, String>(avg, carrier)); if (ts.size() > 10) { ts.remove(ts.last()); } carrierAvgDelay.put(newKey, ts); }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String input[]; double result = 0.0; /* adds all the values corresponding to a key */ for (Text value : values) { result += Double.parseDouble(value.toString()); } context.write(null, new Text(key + "," + Double.toString(result))); }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { String arr[] = value.toString().split("\\r?\\n"); for (String row : arr) { if (row.startsWith("\"")) { continue; } String parts[] = row.split(","); output.collect(new IntWritable(new Integer(parts[1])), new Text(parts[4])); } }
public void testInputFormat() { try { JobConf conf = new JobConf(); String TMP_DIR = System.getProperty("test.build.data", "/tmp"); Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile"); SequenceFile.Writer sfw = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, filename, ChukwaArchiveKey.class, ChunkImpl.class, SequenceFile.CompressionType.NONE, Reporter.NULL); StringBuilder buf = new StringBuilder(); int offsets[] = new int[lines.length]; for (int i = 0; i < lines.length; ++i) { buf.append(lines[i]); buf.append("\n"); offsets[i] = buf.length() - 1; } ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0); ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null); val.setRecordOffsets(offsets); sfw.append(key, val); sfw.append(key, val); // write it twice sfw.close(); long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen(); InputSplit split = new FileSplit(filename, 0, len, (String[]) null); ChukwaInputFormat in = new ChukwaInputFormat(); RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL); LongWritable l = r.createKey(); Text line = r.createValue(); for (int i = 0; i < lines.length * 2; ++i) { boolean succeeded = r.next(l, line); assertTrue(succeeded); assertEquals(i, l.get()); assertEquals(lines[i % lines.length], line.toString()); System.out.println("read line: " + l.get() + " " + line); } boolean succeeded = r.next(l, line); assertFalse(succeeded); } catch (IOException e) { e.printStackTrace(); fail("IO exception " + e); } }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); IntWritable clave = new IntWritable(); IntWritable valor = new IntWritable(); clave.set(Integer.parseInt(line)); valor.set(Integer.parseInt(line) + 1); output.collect(clave, valor); }
public static String format(Object o, String defaultHTML) { String s = Text.toString(o, null); if (Text.isEmpty(s)) { s = defaultHTML; } else { s = s.trim(); s = HTML.escape(s); s = escapeNotAscii(s); } return s; }
public void map( LongWritable key, Text value, OutputCollector<DoubleWritable, DoubleWritable> output, Reporter reporter) throws IOException { String line = value.toString(); DoubleWritable clave = new DoubleWritable(); DoubleWritable valor = new DoubleWritable(); clave.set(Double.parseDouble(line)); valor.set(Math.sqrt(Double.parseDouble(line))); output.collect(clave, valor); }
public void map( Text key, Text val, org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { int i = 0, n = 0, j = 0, lj = 0, hj = 0; String tem = ""; initStopWordsMap(); // initialize the stop list String line = val.toString(); StringTokenizer itr = new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter n = itr.countTokens(); cache = new String[n]; for (i = 0; i < n; i++) { cache[i] = new String(""); // initialize the cache } i = 0; while (itr.hasMoreTokens()) { cache[i] = itr.nextToken(); // padding the cache with the words of the content i++; } for (i = 0; i < n; i++) { keyWord = cache[i]; keyWord = keyWord.trim(); if (!hmStopWord.containsKey(keyWord)) { lj = i - 10; hj = i + 10; if (lj < 0) lj = 0; if (hj > n) hj = n; tem = " "; for (j = lj; j < hj; j++) tem += cache[j] + " "; location = new Text(); location.set(key.toString() + tem); context.write(new Text(keyWord), location); } } }