public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String cur_file = ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName(); String train_file = context.getConfiguration().get("train_file"); if (cur_file.equals(train_file)) { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); myKey.set(word); myVal.set(f_id); context.write(myKey, myVal); } else { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); StringBuilder builder = new StringBuilder(dlt); while (st.hasMoreTokens()) { String filename = st.nextToken(); String tf_idf = st.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myKey.set(word); myVal.set(builder.toString()); context.write(myKey, myVal); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); context.write(word, one); } }
public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text word = new Text(); StringTokenizer s = new StringTokenizer(value.toString()); while (s.hasMoreTokens()) { word.set(s.nextToken()); context.write(word, one); } }
public static List<Double> GetPoint(String s) { StringTokenizer tokenizer = new StringTokenizer(s); List<Double> p = new ArrayList<Double>(58); while (tokenizer.hasMoreTokens()) { p.add(Double.parseDouble(tokenizer.nextToken())); } return p; }
public static List<InetSocketAddress> readAddresses(Path path, Configuration conf) throws IOException { final List<InetSocketAddress> addrs = new ArrayList<InetSocketAddress>(); for (final String line : readConfig(path, conf)) { final StringTokenizer tokens = new StringTokenizer(line); if (tokens.hasMoreTokens()) { final String host = tokens.nextToken(); if (tokens.hasMoreTokens()) { final String port = tokens.nextToken(); addrs.add(new InetSocketAddress(host, Integer.parseInt(port))); } } } return addrs; }
/** Implements the map-method of the Mapper-interface */ @Override public void map( LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (token.length() > 3) { word.set(token); output.collect(word, one); } } }
public List<String> resolve(List<String> names) { List<String> m = new ArrayList<String>(names.size()); if (names.isEmpty()) { return m; } if (scriptName == null) { for (int i = 0; i < names.size(); i++) { m.add(NetworkTopology.DEFAULT_RACK); } return m; } String output = runResolveCommand(names); if (output != null) { StringTokenizer allSwitchInfo = new StringTokenizer(output); while (allSwitchInfo.hasMoreTokens()) { String switchInfo = allSwitchInfo.nextToken(); m.add(switchInfo); } if (m.size() != names.size()) { // invalid number of entries returned by the script LOG.warn( "Script " + scriptName + " returned " + Integer.toString(m.size()) + " values when " + Integer.toString(names.size()) + " were expected."); return null; } } else { // an error occurred. return null to signify this. // (exn was already logged in runResolveCommand) return null; } return m; }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, DoubleWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); int rowIdx = 0; double xValue = 0; if (tokenizer.hasMoreTokens()) { rowIdx = Integer.parseInt(tokenizer.nextToken()); xValue = Double.parseDouble(tokenizer.nextToken()); } double xResult = (resVec[rowIdx] - (sumVec[rowIdx] * xValue)) / diaVec[rowIdx]; output.collect(new IntWritable(rowIdx), new DoubleWritable(xResult)); }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path matFile = new Path(args[0]); FSDataInputStream matData = fs.open(matFile); BufferedReader br = new BufferedReader(new InputStreamReader(matData)); int i = 0; String line; while ((line = br.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line); String iRow = tokenizer.nextToken(); String iCol = tokenizer.nextToken(); if (Integer.parseInt(iRow) == Integer.parseInt(iCol)) { i++; } } br.close(); int dimention = i; conf.setInt("DIMENTION", dimention); Path xFile = new Path("preX/Result"); FSDataOutputStream xData = fs.create(xFile); BufferedWriter iniX = new BufferedWriter(new OutputStreamWriter(xData)); for (int j = 0; j < dimention; j++) { iniX.write(String.valueOf(j) + " 0"); iniX.newLine(); } iniX.close(); URI matVec = new URI(args[0]); DistributedCache.addCacheFile(matVec, conf); int iteration = 0; do { ToolRunner.run(conf, new Jacobi(), args); } while (iteration++ < max_iter && (!stopIteration(conf))); }
public void configure(JobConf conf) { try { Path vInput; FileSystem fs; URI[] fvector; nsize = conf.getInt("DIMENTION", 0); sumVec = new double[nsize]; resVec = new double[nsize]; diaVec = new double[nsize]; Arrays.fill(sumVec, 0); Arrays.fill(resVec, 0); Arrays.fill(diaVec, 0); fvector = DistributedCache.getCacheFiles(conf); vInput = new Path(fvector[0].getPath()); fs = FileSystem.get(URI.create("hdfs://node17.cs.rochester.edu:9000"), conf); FSDataInputStream fdis = fs.open(vInput); String line; while ((line = fdis.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line); int rowIdx = Integer.parseInt(tokenizer.nextToken()); int colIdx = Integer.parseInt(tokenizer.nextToken()); double matVar = Double.parseDouble(tokenizer.nextToken()); if (rowIdx == colIdx) { diaVec[rowIdx] = matVar; } else if (colIdx == nsize) { resVec[rowIdx] = matVar; } else { sumVec[rowIdx] += matVar; } } } catch (IOException e) { e.printStackTrace(); } }
protected void setup(Context context) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(context.getConfiguration()); Path cFile = new Path(context.getConfiguration().get("CFILE")); DataInputStream d = new DataInputStream(fs.open(cFile)); BufferedReader reader = new BufferedReader(new InputStreamReader(d)); String line; while ((line = reader.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line.toString()); if (tokenizer.hasMoreTokens()) { List<Double> centroid = new ArrayList<Double>(58); while (tokenizer.hasMoreTokens()) { centroid.add(Double.parseDouble(tokenizer.nextToken())); } centroids.add(centroid); } } k = centroids.size(); }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String[] pair = new String[2]; int count = 0; for (Text txt : values) { pair[count] = txt.toString(); count++; } // word exists in training if (count == 2) { StringTokenizer st_one, st_two; if (pair[0].contains(dlt)) { st_one = new StringTokenizer(pair[1]); st_two = new StringTokenizer(pair[0]); } else { st_one = new StringTokenizer(pair[0]); st_two = new StringTokenizer(pair[1]); } // outputting the data String f_id = st_one.nextToken(); StringBuilder builder = new StringBuilder(dlt); builder.append(f_id); builder.append(dlt); while (st_two.hasMoreTokens()) { String filename = st_two.nextToken(); String tf_idf = st_two.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myVal.set(builder.toString()); context.write(key, myVal); } }