Esempio n. 1
0
  private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName)
      throws IOException {
    Path reduceFile;
    if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000");
    else reduceFile = new Path(READ_DIR, "part-00000");
    DataInputStream in;
    in = new DataInputStream(fs.open(reduceFile));

    BufferedReader lines;
    lines = new BufferedReader(new InputStreamReader(in));
    long tasks = 0;
    long size = 0;
    long time = 0;
    float rate = 0;
    float sqrate = 0;
    String line;
    while ((line = lines.readLine()) != null) {
      StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
      String attr = tokens.nextToken();
      if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken());
      else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken());
      else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken());
    }

    double med = rate / 1000 / tasks;
    double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med));
    String resultLines[] = {
      "----- DFSCIOTest ----- : "
          + ((testType == TEST_TYPE_WRITE)
              ? "write"
              : (testType == TEST_TYPE_READ) ? "read" : "unknown"),
      "           Date & time: " + new Date(System.currentTimeMillis()),
      "       Number of files: " + tasks,
      "Total MBytes processed: " + size / MEGA,
      "     Throughput mb/sec: " + size * 1000.0 / (time * MEGA),
      "Average IO rate mb/sec: " + med,
      " Std IO rate deviation: " + stdDev,
      "    Test exec time sec: " + (float) execTime / 1000,
      ""
    };

    PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true));
    for (int i = 0; i < resultLines.length; i++) {
      LOG.info(resultLines[i]);
      res.println(resultLines[i]);
    }
  }
  public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed)
      throws Exception {

    LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files");

    Path controlFile = new Path(CONTROL_DIR, "files");
    fs.delete(controlFile, true);
    Random random = new Random(seed);

    SequenceFile.Writer writer =
        SequenceFile.createWriter(
            fs, conf, controlFile, Text.class, LongWritable.class, CompressionType.NONE);

    long totalSize = 0;
    long maxSize = ((megaBytes / numFiles) * 2) + 1;
    try {
      while (totalSize < megaBytes) {
        Text name = new Text(Long.toString(random.nextLong()));

        long size = random.nextLong();
        if (size < 0) size = -size;
        size = size % maxSize;

        // LOG.info(" adding: name="+name+" size="+size);

        writer.append(name, new LongWritable(size));

        totalSize += size;
      }
    } finally {
      writer.close();
    }
    LOG.info("created control file for: " + totalSize + " bytes");
  }
Esempio n. 3
0
 @Override
 public void reduce(
     Text key, Iterator<Text> iterator, OutputCollector<Text, Text> output, Reporter reporter)
     throws IOException {
   long sum = 0;
   while (iterator.hasNext()) {
     sum = sum + Long.parseLong(iterator.next().toString());
   }
   output.collect(key, new Text(String.valueOf(sum)));
 }
    public void map(
        Text key,
        LongWritable value,
        OutputCollector<Text, LongWritable> collector,
        Reporter reporter)
        throws IOException {

      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      random.setSeed(seed);
      reporter.setStatus("opening " + name);

      DataInputStream in = new DataInputStream(fs.open(new Path(DATA_DIR, name)));

      long read = 0;
      try {
        while (read < size) {
          long remains = size - read;
          int n = (remains <= buffer.length) ? (int) remains : buffer.length;
          in.readFully(buffer, 0, n);
          read += n;
          if (fastCheck) {
            Arrays.fill(check, (byte) random.nextInt(Byte.MAX_VALUE));
          } else {
            random.nextBytes(check);
          }
          if (n != buffer.length) {
            Arrays.fill(buffer, n, buffer.length, (byte) 0);
            Arrays.fill(check, n, check.length, (byte) 0);
          }
          assertTrue(Arrays.equals(buffer, check));

          reporter.setStatus("reading " + name + "@" + read + "/" + size);
        }
      } finally {
        in.close();
      }

      collector.collect(new Text("bytes"), new LongWritable(read));

      reporter.setStatus("read " + name);
    }
    public void map(
        Text key, LongWritable value, OutputCollector<K, LongWritable> collector, Reporter reporter)
        throws IOException {
      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      if (size == 0) return;

      reporter.setStatus("opening " + name);

      FSDataInputStream in = fs.open(new Path(DATA_DIR, name));

      try {
        for (int i = 0; i < SEEKS_PER_FILE; i++) {
          // generate a random position
          long position = Math.abs(random.nextLong()) % size;

          // seek file to that position
          reporter.setStatus("seeking " + name);
          in.seek(position);
          byte b = in.readByte();

          // check that byte matches
          byte checkByte = 0;
          // advance random state to that position
          random.setSeed(seed);
          for (int p = 0; p <= position; p += check.length) {
            reporter.setStatus("generating data for " + name);
            if (fastCheck) {
              checkByte = (byte) random.nextInt(Byte.MAX_VALUE);
            } else {
              random.nextBytes(check);
              checkByte = check[(int) (position % check.length)];
            }
          }
          assertEquals(b, checkByte);
        }
      } finally {
        in.close();
      }
    }
    public void map(
        Text key,
        LongWritable value,
        OutputCollector<Text, LongWritable> collector,
        Reporter reporter)
        throws IOException {

      String name = key.toString();
      long size = value.get();
      long seed = Long.parseLong(name);

      random.setSeed(seed);
      reporter.setStatus("creating " + name);

      // write to temp file initially to permit parallel execution
      Path tempFile = new Path(DATA_DIR, name + suffix);
      OutputStream out = fs.create(tempFile);

      long written = 0;
      try {
        while (written < size) {
          if (fastCheck) {
            Arrays.fill(buffer, (byte) random.nextInt(Byte.MAX_VALUE));
          } else {
            random.nextBytes(buffer);
          }
          long remains = size - written;
          int length = (remains <= buffer.length) ? (int) remains : buffer.length;
          out.write(buffer, 0, length);
          written += length;
          reporter.setStatus("writing " + name + "@" + written + "/" + size);
        }
      } finally {
        out.close();
      }
      // rename to final location
      fs.rename(tempFile, new Path(DATA_DIR, name));

      collector.collect(new Text("bytes"), new LongWritable(written));

      reporter.setStatus("wrote " + name);
    }
Esempio n. 7
0
    void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize)
        throws IOException {
      long totalSize = objSize.longValue();
      float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA);
      LOG.info("Number of bytes processed = " + totalSize);
      LOG.info("Exec time = " + execTime);
      LOG.info("IO rate = " + ioRateMbSec);

      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
          new Text(String.valueOf(totalSize)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
          new Text(String.valueOf(execTime)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
          new Text(String.valueOf(ioRateMbSec * 1000)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
          new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000)));
    }
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, ClusterWritable> output,
        Reporter reporter)
        throws IOException {

      String movieIdStr = new String();
      String reviewStr = new String();
      String userIdStr = new String();
      String reviews = new String();
      String line = new String();
      String tok = new String("");
      long movieId;
      int review, userId, p, q, r, rater, rating, movieIndex;
      int clusterId = 0;
      int[] n = new int[maxClusters];
      float[] sq_a = new float[maxClusters];
      float[] sq_b = new float[maxClusters];
      float[] numer = new float[maxClusters];
      float[] denom = new float[maxClusters];
      float max_similarity = 0.0f;
      float similarity = 0.0f;
      Cluster movie = new Cluster();
      ClusterWritable movies_arrl = new ClusterWritable();

      StringBuffer sb = new StringBuffer();

      line = ((Text) value).toString();
      movieIndex = line.indexOf(":");

      for (r = 0; r < maxClusters; r++) {
        numer[r] = 0.0f;
        denom[r] = 0.0f;
        sq_a[r] = 0.0f;
        sq_b[r] = 0.0f;
        n[r] = 0;
      }
      if (movieIndex > 0) {
        movieIdStr = line.substring(0, movieIndex);
        sb.append(movieIdStr).append(":");

        movieId = Long.parseLong(movieIdStr);
        movie.movie_id = movieId;
        reviews = line.substring(movieIndex + 1);
        StringTokenizer token = new StringTokenizer(reviews, ",");

        int attrCnt = 0;
        // while (token.hasMoreTokens()) { Leo
        while (token.hasMoreTokens() && attrCnt < attrNum) {
          tok = token.nextToken();
          int reviewIndex = tok.indexOf("_");
          // userIdStr = tok.substring(0, reviewIndex); //Leo
          userIdStr = String.valueOf(attrCnt);
          reviewStr = tok.substring(reviewIndex + 1);
          if (attrCnt > 0) {
            sb.append(",");
          }
          sb.append(String.valueOf(attrCnt)).append("_").append(reviewStr);
          userId = Integer.parseInt(userIdStr);
          review = Integer.parseInt(reviewStr);
          for (r = 0; r < totalClusters; r++) {
            /*for (q = 0; q < centroids_ref[r].total; q++) {
                rater = centroids_ref[r].reviews.get(q).rater_id;
                rating = (int) centroids_ref[r].reviews.get(q).rating;
                if (userId == rater) {
                    numer[r] += (float) (review * rating);
                    sq_a[r] += (float) (review * review);
                    sq_b[r] += (float) (rating * rating);
                    n[r]++; // counter
                    break; // to avoid multiple ratings by the same reviewer
                }
            }*/
            // Leo
            rating = (int) centroids_ref[r].reviews.get(attrCnt).rating;
            numer[r] += (float) ((review - rating) * (review - rating));

            n[r]++; // counter
          }
          attrCnt++;
        }
        for (p = 0; p < totalClusters; p++) {
          /*denom[p] = (float) ((Math.sqrt((double) sq_a[p])) * (Math
                  .sqrt((double) sq_b[p])));
          if (denom[p] > 0) {
              similarity = numer[p] / denom[p];
              if (similarity > max_similarity) {
                  max_similarity = similarity;
                  clusterId = p;
              }
          }*/
          // Leo
          similarity = 250 - numer[p];
          if (similarity > max_similarity) {
            max_similarity = similarity;
            clusterId = p;
          }
        }

        // movies_arrl.movies.add(line);//Leo
        movies_arrl.movies.add(sb.toString());
        movies_arrl.similarities.add(max_similarity);
        movies_arrl.similarity = max_similarity;
        output.collect(new IntWritable(clusterId), movies_arrl);
        reporter.incrCounter(Counter.WORDS, 1);
      }
    }