// specify input and out keys public void map( LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = value.toString(); // define new variable to be string ArrayList<Integer> range = new ArrayList<Integer>(); for (int i = 2000; i <= 2010; i++) { range.add(i); } // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"); String[] inputs = line.split(","); try { int year = Integer.parseInt(inputs[165]); if (range.contains(year)) { String dur = inputs[3]; String artist_name = inputs[2]; String song_title = inputs[1]; String final_input = artist_name + ',' + dur + ',' + song_title; Final_Value.set(final_input); output.collect(Final_Value, dummy); } } catch (NumberFormatException e) { // do nothing } }
/** * Returns a list of blocks that have timed out their replication requests. Returns null if no * blocks have timed out. */ Block[] getTimedOutBlocks() { synchronized (timedOutItems) { if (timedOutItems.size() <= 0) { return null; } Block[] blockList = timedOutItems.toArray(new Block[timedOutItems.size()]); timedOutItems.clear(); return blockList; } }
/** * Get the archive entries in classpath as an array of Path * * @param conf Configuration that contains the classpath setting */ public static Path[] getArchiveClassPaths(Configuration conf) { String classpath = conf.get("mapred.job.classpath.archives"); if (classpath == null) return null; ArrayList list = Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator"))); Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { paths[i] = new Path((String) list.get(i)); } return paths; }
private float mean(ArrayList<Integer> l) { int t = l.size(); Integer sum = new Integer(0); for (Integer i : l) { sum += i; } return ((float) sum) / t; }
private float standard_deviation(ArrayList<Integer> l) { int t = l.size(); float ans = 0, mn = this.mean(l); for (Integer i : l) { ans += (i - mn) * (i - mn); } return (float) Math.sqrt(ans / (t - 1)); }
public void reduce( IntWritable sameNum, Iterator<Text> data, OutputCollector<Text, jBLASArrayWritable> output, Reporter reporter) throws IOException { int totalBatchCount = exampleCount / batchSize; DoubleMatrix weights = DoubleMatrix.randn(hiddenNodes, visibleNodes); DoubleMatrix hbias = DoubleMatrix.zeros(hiddenNodes); DoubleMatrix vbias = DoubleMatrix.zeros(visibleNodes); DoubleMatrix label = DoubleMatrix.zeros(1); DoubleMatrix hidden_chain = null; DoubleMatrix vdata = DoubleMatrix.zeros(batchSize, visibleNodes); ArrayList<DoubleMatrix> outputmatricies = new ArrayList<DoubleMatrix>(); outputmatricies.add(weights); outputmatricies.add(hbias); outputmatricies.add(vbias); outputmatricies.add(label); outputmatricies.add(vdata); outputmatricies.add(hidden_chain); int j; for (int i = 0; i < totalBatchCount; i++) { j = 0; while (data.hasNext() && j < batchSize) { j++; StringTokenizer tk = new StringTokenizer(data.next().toString()); label.put(0, Double.parseDouble(tk.nextToken())); String image = tk.nextToken(); for (int k = 0; k < image.length(); k++) { Integer val = new Integer(image.charAt(k)); vdata.put(j, k, val.doubleValue()); } dataArray = new jBLASArrayWritable(outputmatricies); batchID.set("1\t" + i); output.collect(batchID, dataArray); } } }
/** * This method gets called everytime before any read/write to make sure that any change to * localDirs is reflected immediately. */ private synchronized void confChanged(Configuration conf) throws IOException { String newLocalDirs = conf.get(contextCfgItemName); if (!newLocalDirs.equals(savedLocalDirs)) { String[] localDirs = conf.getStrings(contextCfgItemName); localFS = FileSystem.getLocal(conf); int numDirs = localDirs.length; ArrayList<String> dirs = new ArrayList<String>(numDirs); ArrayList<DF> dfList = new ArrayList<DF>(numDirs); for (int i = 0; i < numDirs; i++) { try { // filter problematic directories Path tmpDir = new Path(localDirs[i]); if (localFS.mkdirs(tmpDir) || localFS.exists(tmpDir)) { try { DiskChecker.checkDir(new File(localDirs[i])); dirs.add(localDirs[i]); dfList.add(new DF(new File(localDirs[i]), 30000)); } catch (DiskErrorException de) { LOG.warn(localDirs[i] + "is not writable\n" + StringUtils.stringifyException(de)); } } else { LOG.warn("Failed to create " + localDirs[i]); } } catch (IOException ie) { LOG.warn( "Failed to create " + localDirs[i] + ": " + ie.getMessage() + "\n" + StringUtils.stringifyException(ie)); } // ignore } localDirsPath = new Path[dirs.size()]; for (int i = 0; i < localDirsPath.length; i++) { localDirsPath[i] = new Path(dirs.get(i)); } dirDF = dfList.toArray(new DF[dirs.size()]); savedLocalDirs = newLocalDirs; // randomize the first disk picked in the round-robin selection dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size()); } }
public void reduce( IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { HashMap<String, Integer> countries_map = new HashMap<String, Integer>(); ArrayList<Integer> counties = new ArrayList<>(); String cp = new String(); while (values.hasNext()) { cp = values.next().toString(); if (countries_map.containsKey(cp)) { countries_map.put(cp, countries_map.get(cp) + 1); } else { countries_map.put(cp, 1); } } for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) { counties.add(entry.getValue()); } output.collect( key, new Text( "" + countries_map.entrySet().size() + " " + Collections.min(counties) + " " + median(counties) + " " + Collections.max(counties) + " " + mean(counties) + " " + standard_deviation(counties))); }
private int median(ArrayList<Integer> l) { Collections.sort(l); int t = l.size(); return l.get(t / 2); }