コード例 #1
0
    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }
コード例 #2
0
 /**
  * Returns a list of blocks that have timed out their replication requests. Returns null if no
  * blocks have timed out.
  */
 Block[] getTimedOutBlocks() {
   synchronized (timedOutItems) {
     if (timedOutItems.size() <= 0) {
       return null;
     }
     Block[] blockList = timedOutItems.toArray(new Block[timedOutItems.size()]);
     timedOutItems.clear();
     return blockList;
   }
 }
コード例 #3
0
 /**
  * Get the archive entries in classpath as an array of Path
  *
  * @param conf Configuration that contains the classpath setting
  */
 public static Path[] getArchiveClassPaths(Configuration conf) {
   String classpath = conf.get("mapred.job.classpath.archives");
   if (classpath == null) return null;
   ArrayList list =
       Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator")));
   Path[] paths = new Path[list.size()];
   for (int i = 0; i < list.size(); i++) {
     paths[i] = new Path((String) list.get(i));
   }
   return paths;
 }
コード例 #4
0
 private float mean(ArrayList<Integer> l) {
   int t = l.size();
   Integer sum = new Integer(0);
   for (Integer i : l) {
     sum += i;
   }
   return ((float) sum) / t;
 }
コード例 #5
0
    private float standard_deviation(ArrayList<Integer> l) {
      int t = l.size();
      float ans = 0, mn = this.mean(l);

      for (Integer i : l) {
        ans += (i - mn) * (i - mn);
      }
      return (float) Math.sqrt(ans / (t - 1));
    }
コード例 #6
0
    public void reduce(
        IntWritable sameNum,
        Iterator<Text> data,
        OutputCollector<Text, jBLASArrayWritable> output,
        Reporter reporter)
        throws IOException {
      int totalBatchCount = exampleCount / batchSize;

      DoubleMatrix weights = DoubleMatrix.randn(hiddenNodes, visibleNodes);
      DoubleMatrix hbias = DoubleMatrix.zeros(hiddenNodes);
      DoubleMatrix vbias = DoubleMatrix.zeros(visibleNodes);
      DoubleMatrix label = DoubleMatrix.zeros(1);
      DoubleMatrix hidden_chain = null;
      DoubleMatrix vdata = DoubleMatrix.zeros(batchSize, visibleNodes);

      ArrayList<DoubleMatrix> outputmatricies = new ArrayList<DoubleMatrix>();
      outputmatricies.add(weights);
      outputmatricies.add(hbias);
      outputmatricies.add(vbias);
      outputmatricies.add(label);
      outputmatricies.add(vdata);
      outputmatricies.add(hidden_chain);

      int j;
      for (int i = 0; i < totalBatchCount; i++) {
        j = 0;
        while (data.hasNext() && j < batchSize) {
          j++;
          StringTokenizer tk = new StringTokenizer(data.next().toString());
          label.put(0, Double.parseDouble(tk.nextToken()));
          String image = tk.nextToken();
          for (int k = 0; k < image.length(); k++) {
            Integer val = new Integer(image.charAt(k));
            vdata.put(j, k, val.doubleValue());
          }
          dataArray = new jBLASArrayWritable(outputmatricies);
          batchID.set("1\t" + i);
          output.collect(batchID, dataArray);
        }
      }
    }
コード例 #7
0
    /**
     * This method gets called everytime before any read/write to make sure that any change to
     * localDirs is reflected immediately.
     */
    private synchronized void confChanged(Configuration conf) throws IOException {
      String newLocalDirs = conf.get(contextCfgItemName);
      if (!newLocalDirs.equals(savedLocalDirs)) {
        String[] localDirs = conf.getStrings(contextCfgItemName);
        localFS = FileSystem.getLocal(conf);
        int numDirs = localDirs.length;
        ArrayList<String> dirs = new ArrayList<String>(numDirs);
        ArrayList<DF> dfList = new ArrayList<DF>(numDirs);
        for (int i = 0; i < numDirs; i++) {
          try {
            // filter problematic directories
            Path tmpDir = new Path(localDirs[i]);
            if (localFS.mkdirs(tmpDir) || localFS.exists(tmpDir)) {
              try {
                DiskChecker.checkDir(new File(localDirs[i]));
                dirs.add(localDirs[i]);
                dfList.add(new DF(new File(localDirs[i]), 30000));
              } catch (DiskErrorException de) {
                LOG.warn(localDirs[i] + "is not writable\n" + StringUtils.stringifyException(de));
              }
            } else {
              LOG.warn("Failed to create " + localDirs[i]);
            }
          } catch (IOException ie) {
            LOG.warn(
                "Failed to create "
                    + localDirs[i]
                    + ": "
                    + ie.getMessage()
                    + "\n"
                    + StringUtils.stringifyException(ie));
          } // ignore
        }
        localDirsPath = new Path[dirs.size()];
        for (int i = 0; i < localDirsPath.length; i++) {
          localDirsPath[i] = new Path(dirs.get(i));
        }
        dirDF = dfList.toArray(new DF[dirs.size()]);
        savedLocalDirs = newLocalDirs;

        // randomize the first disk picked in the round-robin selection
        dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size());
      }
    }
コード例 #8
0
    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      HashMap<String, Integer> countries_map = new HashMap<String, Integer>();
      ArrayList<Integer> counties = new ArrayList<>();
      String cp = new String();

      while (values.hasNext()) {
        cp = values.next().toString();
        if (countries_map.containsKey(cp)) {
          countries_map.put(cp, countries_map.get(cp) + 1);
        } else {
          countries_map.put(cp, 1);
        }
      }

      for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) {
        counties.add(entry.getValue());
      }
      output.collect(
          key,
          new Text(
              ""
                  + countries_map.entrySet().size()
                  + " "
                  + Collections.min(counties)
                  + " "
                  + median(counties)
                  + " "
                  + Collections.max(counties)
                  + " "
                  + mean(counties)
                  + " "
                  + standard_deviation(counties)));
    }
コード例 #9
0
 private int median(ArrayList<Integer> l) {
   Collections.sort(l);
   int t = l.size();
   return l.get(t / 2);
 }