コード例 #1
0
    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }
コード例 #2
0
ファイル: NutchBean.java プロジェクト: albanm/nutchbase
 public static List<String> readConfig(Path path, Configuration conf) throws IOException {
   final FileSystem fs = FileSystem.get(conf);
   final BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(path)));
   try {
     final ArrayList<String> addrs = new ArrayList<String>();
     String line;
     while ((line = reader.readLine()) != null) {
       addrs.add(line);
     }
     return addrs;
   } finally {
     reader.close();
   }
 }
コード例 #3
0
ファイル: JobTracker.java プロジェクト: bruthe/hadoop-0.1r
  /**
   * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks.
   * This exists so the reduce task thread can locate map task outputs.
   */
  public synchronized MapOutputLocation[] locateMapOutputs(
      String taskId, String[][] mapTasksNeeded) {
    ArrayList v = new ArrayList();
    for (int i = 0; i < mapTasksNeeded.length; i++) {
      for (int j = 0; j < mapTasksNeeded[i].length; j++) {
        TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]);
        if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) {
          String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]);
          TaskTrackerStatus tracker;
          synchronized (taskTrackers) {
            tracker = (TaskTrackerStatus) taskTrackers.get(trackerId);
          }
          v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort()));
          break;
        }
      }
    }
    // randomly shuffle results to load-balance map output requests
    Collections.shuffle(v);

    return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]);
  }
コード例 #4
0
    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      HashMap<String, Integer> countries_map = new HashMap<String, Integer>();
      ArrayList<Integer> counties = new ArrayList<>();
      String cp = new String();

      while (values.hasNext()) {
        cp = values.next().toString();
        if (countries_map.containsKey(cp)) {
          countries_map.put(cp, countries_map.get(cp) + 1);
        } else {
          countries_map.put(cp, 1);
        }
      }

      for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) {
        counties.add(entry.getValue());
      }
      output.collect(
          key,
          new Text(
              ""
                  + countries_map.entrySet().size()
                  + " "
                  + Collections.min(counties)
                  + " "
                  + median(counties)
                  + " "
                  + Collections.max(counties)
                  + " "
                  + mean(counties)
                  + " "
                  + standard_deviation(counties)));
    }