public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String cur_file = ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName(); String train_file = context.getConfiguration().get("train_file"); if (cur_file.equals(train_file)) { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); myKey.set(word); myVal.set(f_id); context.write(myKey, myVal); } else { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); StringBuilder builder = new StringBuilder(dlt); while (st.hasMoreTokens()) { String filename = st.nextToken(); String tf_idf = st.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myKey.set(word); myVal.set(builder.toString()); context.write(myKey, myVal); } }
public static void createCentersSequenceFile( Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath) throws Exception { Path seqFile = new Path(sequenceFilePath); if (fs.exists(seqFile)) { fs.delete(seqFile, true); } FSDataInputStream inputStream = fs.open(new Path(centroidsPath)); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class); IntWritable value = new IntWritable(0); while (inputStream.available() > 0) { String line = inputStream.readLine(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int dim = tokenizer.countTokens() - 1; int clusterId = Integer.valueOf(tokenizer.nextToken()); double[] coords = new double[dim]; for (int i = 0; i < dim; i++) { coords[i] = Double.valueOf(tokenizer.nextToken()); } Centroid cluster = new Centroid(clusterId, new Point(coords)); writer.append(cluster, value); } IOUtils.closeStream(writer); inputStream.close(); }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int dim = tokenizer.countTokens(); double[] coords = new double[dim]; for (int i = 0; i < dim; i++) { coords[i] = Double.valueOf(tokenizer.nextToken()); } Point point = new Point(coords); Centroid nearest = null; double nearestDistance = Double.MAX_VALUE; for (Centroid c : centers) { double dist = point.euclideanDistance(c); if (nearest == null) { nearest = c; nearestDistance = dist; } else { if (dist < nearestDistance) { nearest = c; nearestDistance = dist; } } } context.write(nearest, point); }
public void map( LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); String lastToken = null; StringTokenizer s = new StringTokenizer(line, "\t"); String year = s.nextToken(); while (s.hasMoreTokens()) { lastToken = s.nextToken(); } int averagePrice = Integer.parseInt(lastToken); output.collect(new Text(year), new IntWritable(averagePrice)); }
@Override public void map( LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokens = new StringTokenizer(line); String[] keys = tokens.nextToken().toString().split("-"); String date_temp = tokens.nextToken(); String country = keys[0]; String year = keys[1]; // Mandamos año y un iterable de [ciudad,mes,temperatura,fecha] output.collect(new Text(year), new Text(country + "," + date_temp)); }
public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); context.write(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text word = new Text(); StringTokenizer s = new StringTokenizer(value.toString()); while (s.hasMoreTokens()) { word.set(s.nextToken()); context.write(word, one); } }
public void map( IntWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { String dataRow = value.toString(); StringTokenizer tk = new StringTokenizer(dataRow); String label = tk.nextToken(); String image = tk.nextToken(); dataString.set(label + "\t" + image); output.collect(sameKey, dataString); }
public static List<Double> GetPoint(String s) { StringTokenizer tokenizer = new StringTokenizer(s); List<Double> p = new ArrayList<Double>(58); while (tokenizer.hasMoreTokens()) { p.add(Double.parseDouble(tokenizer.nextToken())); } return p; }
public void reduce( Text key, Iterable<Text> values, org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, InvertedListWritable>.Context context) throws IOException, InterruptedException { InvertedListWritable invertedList = new InvertedListWritable(); for (Text k : values) { StringTokenizer itr = new StringTokenizer(k.toString()); url = ""; abs = ""; if (itr.hasMoreTokens()) url = itr.nextToken(); while (itr.hasMoreTokens()) { abs += itr.nextToken() + " "; } dr = new ListURL(url, abs); invertedList.paddingValueKey(dr); } invertedList.quickSortNodeKey(); context.write(key, invertedList); }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, DoubleWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); int rowIdx = 0; double xValue = 0; if (tokenizer.hasMoreTokens()) { rowIdx = Integer.parseInt(tokenizer.nextToken()); xValue = Double.parseDouble(tokenizer.nextToken()); } double xResult = (resVec[rowIdx] - (sumVec[rowIdx] * xValue)) / diaVec[rowIdx]; output.collect(new IntWritable(rowIdx), new DoubleWritable(xResult)); }
public void reduce( IntWritable sameNum, Iterator<Text> data, OutputCollector<Text, jBLASArrayWritable> output, Reporter reporter) throws IOException { int totalBatchCount = exampleCount / batchSize; DoubleMatrix weights = DoubleMatrix.randn(hiddenNodes, visibleNodes); DoubleMatrix hbias = DoubleMatrix.zeros(hiddenNodes); DoubleMatrix vbias = DoubleMatrix.zeros(visibleNodes); DoubleMatrix label = DoubleMatrix.zeros(1); DoubleMatrix hidden_chain = null; DoubleMatrix vdata = DoubleMatrix.zeros(batchSize, visibleNodes); ArrayList<DoubleMatrix> outputmatricies = new ArrayList<DoubleMatrix>(); outputmatricies.add(weights); outputmatricies.add(hbias); outputmatricies.add(vbias); outputmatricies.add(label); outputmatricies.add(vdata); outputmatricies.add(hidden_chain); int j; for (int i = 0; i < totalBatchCount; i++) { j = 0; while (data.hasNext() && j < batchSize) { j++; StringTokenizer tk = new StringTokenizer(data.next().toString()); label.put(0, Double.parseDouble(tk.nextToken())); String image = tk.nextToken(); for (int k = 0; k < image.length(); k++) { Integer val = new Integer(image.charAt(k)); vdata.put(j, k, val.doubleValue()); } dataArray = new jBLASArrayWritable(outputmatricies); batchID.set("1\t" + i); output.collect(batchID, dataArray); } } }
/** * Make a path relative with respect to a root path. absPath is always assumed to descend from * root. Otherwise returned path is null. */ static String makeRelative(Path root, Path absPath) { if (!absPath.isAbsolute()) { throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath); } String p = absPath.toUri().getPath(); StringTokenizer pathTokens = new StringTokenizer(p, "/"); for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens.hasMoreTokens(); ) { if (!rootTokens.nextToken().equals(pathTokens.nextToken())) { return null; } } StringBuilder sb = new StringBuilder(); for (; pathTokens.hasMoreTokens(); ) { sb.append(pathTokens.nextToken()); if (pathTokens.hasMoreTokens()) { sb.append(Path.SEPARATOR); } } return sb.length() == 0 ? "." : sb.toString(); }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String[] pair = new String[2]; int count = 0; for (Text txt : values) { pair[count] = txt.toString(); count++; } // word exists in training if (count == 2) { StringTokenizer st_one, st_two; if (pair[0].contains(dlt)) { st_one = new StringTokenizer(pair[1]); st_two = new StringTokenizer(pair[0]); } else { st_one = new StringTokenizer(pair[0]); st_two = new StringTokenizer(pair[1]); } // outputting the data String f_id = st_one.nextToken(); StringBuilder builder = new StringBuilder(dlt); builder.append(f_id); builder.append(dlt); while (st_two.hasMoreTokens()) { String filename = st_two.nextToken(); String tf_idf = st_two.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myVal.set(builder.toString()); context.write(key, myVal); } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line, " "); int dim = tokenizer.countTokens(); double[] coords = new double[dim]; for (int i = 0; i < dim; i++) { coords[i] = Double.valueOf(tokenizer.nextToken()); } Centroid center = centers.get(rand.nextInt(centers.size())); Point point = new Point(coords); context.write(center, point); }
public void configure(JobConf conf) { try { Path vInput; FileSystem fs; URI[] fvector; nsize = conf.getInt("DIMENTION", 0); sumVec = new double[nsize]; resVec = new double[nsize]; diaVec = new double[nsize]; Arrays.fill(sumVec, 0); Arrays.fill(resVec, 0); Arrays.fill(diaVec, 0); fvector = DistributedCache.getCacheFiles(conf); vInput = new Path(fvector[0].getPath()); fs = FileSystem.get(URI.create("hdfs://node17.cs.rochester.edu:9000"), conf); FSDataInputStream fdis = fs.open(vInput); String line; while ((line = fdis.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line); int rowIdx = Integer.parseInt(tokenizer.nextToken()); int colIdx = Integer.parseInt(tokenizer.nextToken()); double matVar = Double.parseDouble(tokenizer.nextToken()); if (rowIdx == colIdx) { diaVec[rowIdx] = matVar; } else if (colIdx == nsize) { resVec[rowIdx] = matVar; } else { sumVec[rowIdx] += matVar; } } } catch (IOException e) { e.printStackTrace(); } }
/** Implements the map-method of the Mapper-interface */ @Override public void map( LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (token.length() > 3) { word.set(token); output.collect(word, one); } } }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path matFile = new Path(args[0]); FSDataInputStream matData = fs.open(matFile); BufferedReader br = new BufferedReader(new InputStreamReader(matData)); int i = 0; String line; while ((line = br.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line); String iRow = tokenizer.nextToken(); String iCol = tokenizer.nextToken(); if (Integer.parseInt(iRow) == Integer.parseInt(iCol)) { i++; } } br.close(); int dimention = i; conf.setInt("DIMENTION", dimention); Path xFile = new Path("preX/Result"); FSDataOutputStream xData = fs.create(xFile); BufferedWriter iniX = new BufferedWriter(new OutputStreamWriter(xData)); for (int j = 0; j < dimention; j++) { iniX.write(String.valueOf(j) + " 0"); iniX.newLine(); } iniX.close(); URI matVec = new URI(args[0]); DistributedCache.addCacheFile(matVec, conf); int iteration = 0; do { ToolRunner.run(conf, new Jacobi(), args); } while (iteration++ < max_iter && (!stopIteration(conf))); }
public void map( Text key, Text val, org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { int i = 0, n = 0, j = 0, lj = 0, hj = 0; String tem = ""; initStopWordsMap(); // initialize the stop list String line = val.toString(); StringTokenizer itr = new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter n = itr.countTokens(); cache = new String[n]; for (i = 0; i < n; i++) { cache[i] = new String(""); // initialize the cache } i = 0; while (itr.hasMoreTokens()) { cache[i] = itr.nextToken(); // padding the cache with the words of the content i++; } for (i = 0; i < n; i++) { keyWord = cache[i]; keyWord = keyWord.trim(); if (!hmStopWord.containsKey(keyWord)) { lj = i - 10; hj = i + 10; if (lj < 0) lj = 0; if (hj > n) hj = n; tem = " "; for (j = lj; j < hj; j++) tem += cache[j] + " "; location = new Text(); location.set(key.toString() + tem); context.write(new Text(keyWord), location); } } }
protected void setup(Context context) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(context.getConfiguration()); Path cFile = new Path(context.getConfiguration().get("CFILE")); DataInputStream d = new DataInputStream(fs.open(cFile)); BufferedReader reader = new BufferedReader(new InputStreamReader(d)); String line; while ((line = reader.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line.toString()); if (tokenizer.hasMoreTokens()) { List<Double> centroid = new ArrayList<Double>(58); while (tokenizer.hasMoreTokens()) { centroid.add(Double.parseDouble(tokenizer.nextToken())); } centroids.add(centroid); } } k = centroids.size(); }