// specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }
 /**
  * Get the archive entries in classpath as an array of Path
  *
  * @param conf Configuration that contains the classpath setting
  */
 public static Path[] getArchiveClassPaths(Configuration conf) {
   String classpath = conf.get("mapred.job.classpath.archives");
   if (classpath == null) return null;
   ArrayList list =
       Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator")));
   Path[] paths = new Path[list.size()];
   for (int i = 0; i < list.size(); i++) {
     paths[i] = new Path((String) list.get(i));
   }
   return paths;
 }
Beispiel #3
0
 public static List<String> readConfig(Path path, Configuration conf) throws IOException {
   final FileSystem fs = FileSystem.get(conf);
   final BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(path)));
   try {
     final ArrayList<String> addrs = new ArrayList<String>();
     String line;
     while ((line = reader.readLine()) != null) {
       addrs.add(line);
     }
     return addrs;
   } finally {
     reader.close();
   }
 }
 private float mean(ArrayList<Integer> l) {
   int t = l.size();
   Integer sum = new Integer(0);
   for (Integer i : l) {
     sum += i;
   }
   return ((float) sum) / t;
 }
    private float standard_deviation(ArrayList<Integer> l) {
      int t = l.size();
      float ans = 0, mn = this.mean(l);

      for (Integer i : l) {
        ans += (i - mn) * (i - mn);
      }
      return (float) Math.sqrt(ans / (t - 1));
    }
Beispiel #6
0
  /**
   * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks.
   * This exists so the reduce task thread can locate map task outputs.
   */
  public synchronized MapOutputLocation[] locateMapOutputs(
      String taskId, String[][] mapTasksNeeded) {
    ArrayList v = new ArrayList();
    for (int i = 0; i < mapTasksNeeded.length; i++) {
      for (int j = 0; j < mapTasksNeeded[i].length; j++) {
        TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]);
        if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) {
          String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]);
          TaskTrackerStatus tracker;
          synchronized (taskTrackers) {
            tracker = (TaskTrackerStatus) taskTrackers.get(trackerId);
          }
          v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort()));
          break;
        }
      }
    }
    // randomly shuffle results to load-balance map output requests
    Collections.shuffle(v);

    return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]);
  }
    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      HashMap<String, Integer> countries_map = new HashMap<String, Integer>();
      ArrayList<Integer> counties = new ArrayList<>();
      String cp = new String();

      while (values.hasNext()) {
        cp = values.next().toString();
        if (countries_map.containsKey(cp)) {
          countries_map.put(cp, countries_map.get(cp) + 1);
        } else {
          countries_map.put(cp, 1);
        }
      }

      for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) {
        counties.add(entry.getValue());
      }
      output.collect(
          key,
          new Text(
              ""
                  + countries_map.entrySet().size()
                  + " "
                  + Collections.min(counties)
                  + " "
                  + median(counties)
                  + " "
                  + Collections.max(counties)
                  + " "
                  + mean(counties)
                  + " "
                  + standard_deviation(counties)));
    }
 private int median(ArrayList<Integer> l) {
   Collections.sort(l);
   int t = l.size();
   return l.get(t / 2);
 }