コード例 #1
1
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
コード例 #2
0
ファイル: KMeansDriver.java プロジェクト: ktzoumas/flink-perf
 public static void createCentersSequenceFile(
     Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath)
     throws Exception {
   Path seqFile = new Path(sequenceFilePath);
   if (fs.exists(seqFile)) {
     fs.delete(seqFile, true);
   }
   FSDataInputStream inputStream = fs.open(new Path(centroidsPath));
   SequenceFile.Writer writer =
       SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class);
   IntWritable value = new IntWritable(0);
   while (inputStream.available() > 0) {
     String line = inputStream.readLine();
     StringTokenizer tokenizer = new StringTokenizer(line, " ");
     int dim = tokenizer.countTokens() - 1;
     int clusterId = Integer.valueOf(tokenizer.nextToken());
     double[] coords = new double[dim];
     for (int i = 0; i < dim; i++) {
       coords[i] = Double.valueOf(tokenizer.nextToken());
     }
     Centroid cluster = new Centroid(clusterId, new Point(coords));
     writer.append(cluster, value);
   }
   IOUtils.closeStream(writer);
   inputStream.close();
 }
コード例 #3
0
ファイル: KMeansDriver.java プロジェクト: ktzoumas/flink-perf
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line, " ");
      int dim = tokenizer.countTokens();
      double[] coords = new double[dim];
      for (int i = 0; i < dim; i++) {
        coords[i] = Double.valueOf(tokenizer.nextToken());
      }
      Point point = new Point(coords);

      Centroid nearest = null;
      double nearestDistance = Double.MAX_VALUE;
      for (Centroid c : centers) {
        double dist = point.euclideanDistance(c);
        if (nearest == null) {
          nearest = c;
          nearestDistance = dist;
        } else {
          if (dist < nearestDistance) {
            nearest = c;
            nearestDistance = dist;
          }
        }
      }
      context.write(nearest, point);
    }
コード例 #4
0
    public void map(
        LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      String lastToken = null;
      StringTokenizer s = new StringTokenizer(line, "\t");
      String year = s.nextToken();

      while (s.hasMoreTokens()) {
        lastToken = s.nextToken();
      }

      int averagePrice = Integer.parseInt(lastToken);
      output.collect(new Text(year), new IntWritable(averagePrice));
    }
コード例 #5
0
ファイル: Fill.java プロジェクト: uzielgl/hadoop
    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      StringTokenizer tokens = new StringTokenizer(line);

      String[] keys = tokens.nextToken().toString().split("-");
      String date_temp = tokens.nextToken();

      String country = keys[0];
      String year = keys[1];

      // Mandamos año y un iterable de [ciudad,mes,temperatura,fecha]
      output.collect(new Text(year), new Text(country + "," + date_temp));
    }
コード例 #6
0
 public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter)
     throws IOException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     output.collect(word, one);
   }
 }
コード例 #7
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     context.write(word, one);
   }
 }
コード例 #8
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   Text word = new Text();
   StringTokenizer s = new StringTokenizer(value.toString());
   while (s.hasMoreTokens()) {
     word.set(s.nextToken());
     context.write(word, one);
   }
 }
コード例 #9
0
 public void map(
     IntWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   String dataRow = value.toString();
   StringTokenizer tk = new StringTokenizer(dataRow);
   String label = tk.nextToken();
   String image = tk.nextToken();
   dataString.set(label + "\t" + image);
   output.collect(sameKey, dataString);
 }
コード例 #10
0
ファイル: HW2_Q4.java プロジェクト: pscuderi/academic-samples
  public static List<Double> GetPoint(String s) {
    StringTokenizer tokenizer = new StringTokenizer(s);

    List<Double> p = new ArrayList<Double>(58);

    while (tokenizer.hasMoreTokens()) {
      p.add(Double.parseDouble(tokenizer.nextToken()));
    }

    return p;
  }
コード例 #11
0
 public void reduce(
     Text key,
     Iterable<Text> values,
     org.apache.hadoop.mapreduce.Reducer<Text, Text, Text, InvertedListWritable>.Context context)
     throws IOException, InterruptedException {
   InvertedListWritable invertedList = new InvertedListWritable();
   for (Text k : values) {
     StringTokenizer itr = new StringTokenizer(k.toString());
     url = "";
     abs = "";
     if (itr.hasMoreTokens()) url = itr.nextToken();
     while (itr.hasMoreTokens()) {
       abs += itr.nextToken() + " ";
     }
     dr = new ListURL(url, abs);
     invertedList.paddingValueKey(dr);
   }
   invertedList.quickSortNodeKey();
   context.write(key, invertedList);
 }
コード例 #12
0
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, DoubleWritable> output,
        Reporter reporter)
        throws IOException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);

      int rowIdx = 0;
      double xValue = 0;

      if (tokenizer.hasMoreTokens()) {
        rowIdx = Integer.parseInt(tokenizer.nextToken());
        xValue = Double.parseDouble(tokenizer.nextToken());
      }

      double xResult = (resVec[rowIdx] - (sumVec[rowIdx] * xValue)) / diaVec[rowIdx];
      output.collect(new IntWritable(rowIdx), new DoubleWritable(xResult));
    }
コード例 #13
0
    public void reduce(
        IntWritable sameNum,
        Iterator<Text> data,
        OutputCollector<Text, jBLASArrayWritable> output,
        Reporter reporter)
        throws IOException {
      int totalBatchCount = exampleCount / batchSize;

      DoubleMatrix weights = DoubleMatrix.randn(hiddenNodes, visibleNodes);
      DoubleMatrix hbias = DoubleMatrix.zeros(hiddenNodes);
      DoubleMatrix vbias = DoubleMatrix.zeros(visibleNodes);
      DoubleMatrix label = DoubleMatrix.zeros(1);
      DoubleMatrix hidden_chain = null;
      DoubleMatrix vdata = DoubleMatrix.zeros(batchSize, visibleNodes);

      ArrayList<DoubleMatrix> outputmatricies = new ArrayList<DoubleMatrix>();
      outputmatricies.add(weights);
      outputmatricies.add(hbias);
      outputmatricies.add(vbias);
      outputmatricies.add(label);
      outputmatricies.add(vdata);
      outputmatricies.add(hidden_chain);

      int j;
      for (int i = 0; i < totalBatchCount; i++) {
        j = 0;
        while (data.hasNext() && j < batchSize) {
          j++;
          StringTokenizer tk = new StringTokenizer(data.next().toString());
          label.put(0, Double.parseDouble(tk.nextToken()));
          String image = tk.nextToken();
          for (int k = 0; k < image.length(); k++) {
            Integer val = new Integer(image.charAt(k));
            vdata.put(j, k, val.doubleValue());
          }
          dataArray = new jBLASArrayWritable(outputmatricies);
          batchID.set("1\t" + i);
          output.collect(batchID, dataArray);
        }
      }
    }
コード例 #14
0
ファイル: DistCp.java プロジェクト: neutronsharc/hdfsbackup
  /**
   * Make a path relative with respect to a root path. absPath is always assumed to descend from
   * root. Otherwise returned path is null.
   */
  static String makeRelative(Path root, Path absPath) {
    if (!absPath.isAbsolute()) {
      throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath);
    }
    String p = absPath.toUri().getPath();

    StringTokenizer pathTokens = new StringTokenizer(p, "/");
    for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/");
        rootTokens.hasMoreTokens(); ) {
      if (!rootTokens.nextToken().equals(pathTokens.nextToken())) {
        return null;
      }
    }
    StringBuilder sb = new StringBuilder();
    for (; pathTokens.hasMoreTokens(); ) {
      sb.append(pathTokens.nextToken());
      if (pathTokens.hasMoreTokens()) {
        sb.append(Path.SEPARATOR);
      }
    }
    return sb.length() == 0 ? "." : sb.toString();
  }
コード例 #15
0
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

      String[] pair = new String[2];
      int count = 0;
      for (Text txt : values) {
        pair[count] = txt.toString();
        count++;
      }

      // word exists in training
      if (count == 2) {
        StringTokenizer st_one, st_two;
        if (pair[0].contains(dlt)) {
          st_one = new StringTokenizer(pair[1]);
          st_two = new StringTokenizer(pair[0]);
        } else {
          st_one = new StringTokenizer(pair[0]);
          st_two = new StringTokenizer(pair[1]);
        }

        // outputting the data
        String f_id = st_one.nextToken();

        StringBuilder builder = new StringBuilder(dlt);
        builder.append(f_id);
        builder.append(dlt);
        while (st_two.hasMoreTokens()) {
          String filename = st_two.nextToken();
          String tf_idf = st_two.nextToken();
          builder.append(filename);
          builder.append(dlt);
          builder.append(tf_idf);
          builder.append("\t");
        }
        myVal.set(builder.toString());
        context.write(key, myVal);
      }
    }
コード例 #16
0
ファイル: KMeansDriver.java プロジェクト: ktzoumas/flink-perf
 @Override
 protected void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line, " ");
   int dim = tokenizer.countTokens();
   double[] coords = new double[dim];
   for (int i = 0; i < dim; i++) {
     coords[i] = Double.valueOf(tokenizer.nextToken());
   }
   Centroid center = centers.get(rand.nextInt(centers.size()));
   Point point = new Point(coords);
   context.write(center, point);
 }
コード例 #17
0
    public void configure(JobConf conf) {
      try {
        Path vInput;
        FileSystem fs;
        URI[] fvector;
        nsize = conf.getInt("DIMENTION", 0);
        sumVec = new double[nsize];
        resVec = new double[nsize];
        diaVec = new double[nsize];
        Arrays.fill(sumVec, 0);
        Arrays.fill(resVec, 0);
        Arrays.fill(diaVec, 0);

        fvector = DistributedCache.getCacheFiles(conf);
        vInput = new Path(fvector[0].getPath());
        fs = FileSystem.get(URI.create("hdfs://node17.cs.rochester.edu:9000"), conf);

        FSDataInputStream fdis = fs.open(vInput);

        String line;
        while ((line = fdis.readLine()) != null) {
          StringTokenizer tokenizer = new StringTokenizer(line);
          int rowIdx = Integer.parseInt(tokenizer.nextToken());
          int colIdx = Integer.parseInt(tokenizer.nextToken());
          double matVar = Double.parseDouble(tokenizer.nextToken());
          if (rowIdx == colIdx) {
            diaVec[rowIdx] = matVar;
          } else if (colIdx == nsize) {
            resVec[rowIdx] = matVar;
          } else {
            sumVec[rowIdx] += matVar;
          }
        }
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
コード例 #18
0
    /** Implements the map-method of the Mapper-interface */
    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);

      while (tokenizer.hasMoreTokens()) {

        String token = tokenizer.nextToken();

        if (token.length() > 3) {
          word.set(token);
          output.collect(word, one);
        }
      }
    }
コード例 #19
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path matFile = new Path(args[0]);
    FSDataInputStream matData = fs.open(matFile);
    BufferedReader br = new BufferedReader(new InputStreamReader(matData));

    int i = 0;
    String line;
    while ((line = br.readLine()) != null) {
      StringTokenizer tokenizer = new StringTokenizer(line);
      String iRow = tokenizer.nextToken();
      String iCol = tokenizer.nextToken();
      if (Integer.parseInt(iRow) == Integer.parseInt(iCol)) {
        i++;
      }
    }
    br.close();
    int dimention = i;

    conf.setInt("DIMENTION", dimention);
    Path xFile = new Path("preX/Result");
    FSDataOutputStream xData = fs.create(xFile);
    BufferedWriter iniX = new BufferedWriter(new OutputStreamWriter(xData));
    for (int j = 0; j < dimention; j++) {
      iniX.write(String.valueOf(j) + " 0");
      iniX.newLine();
    }
    iniX.close();

    URI matVec = new URI(args[0]);
    DistributedCache.addCacheFile(matVec, conf);

    int iteration = 0;
    do {
      ToolRunner.run(conf, new Jacobi(), args);
    } while (iteration++ < max_iter && (!stopIteration(conf)));
  }
コード例 #20
0
    public void map(
        Text key,
        Text val,
        org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
      int i = 0, n = 0, j = 0, lj = 0, hj = 0;
      String tem = "";

      initStopWordsMap(); // initialize  the stop list
      String line = val.toString();
      StringTokenizer itr =
          new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter
      n = itr.countTokens();
      cache = new String[n];
      for (i = 0; i < n; i++) {
        cache[i] = new String(""); // initialize the cache
      }
      i = 0;
      while (itr.hasMoreTokens()) {
        cache[i] = itr.nextToken(); // padding the cache with the words of the content
        i++;
      }
      for (i = 0; i < n; i++) {
        keyWord = cache[i];
        keyWord = keyWord.trim();
        if (!hmStopWord.containsKey(keyWord)) {
          lj = i - 10;
          hj = i + 10;
          if (lj < 0) lj = 0;
          if (hj > n) hj = n;
          tem = " ";
          for (j = lj; j < hj; j++) tem += cache[j] + " ";
          location = new Text();
          location.set(key.toString() + tem);
          context.write(new Text(keyWord), location);
        }
      }
    }
コード例 #21
0
ファイル: HW2_Q4.java プロジェクト: pscuderi/academic-samples
    protected void setup(Context context) throws IOException, InterruptedException {
      FileSystem fs = FileSystem.get(context.getConfiguration());

      Path cFile = new Path(context.getConfiguration().get("CFILE"));
      DataInputStream d = new DataInputStream(fs.open(cFile));
      BufferedReader reader = new BufferedReader(new InputStreamReader(d));

      String line;
      while ((line = reader.readLine()) != null) {
        StringTokenizer tokenizer = new StringTokenizer(line.toString());

        if (tokenizer.hasMoreTokens()) {
          List<Double> centroid = new ArrayList<Double>(58);

          while (tokenizer.hasMoreTokens()) {
            centroid.add(Double.parseDouble(tokenizer.nextToken()));
          }

          centroids.add(centroid);
        }
      }

      k = centroids.size();
    }