private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName) throws IOException { Path reduceFile; if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000"); else reduceFile = new Path(READ_DIR, "part-00000"); DataInputStream in; in = new DataInputStream(fs.open(reduceFile)); BufferedReader lines; lines = new BufferedReader(new InputStreamReader(in)); long tasks = 0; long size = 0; long time = 0; float rate = 0; float sqrate = 0; String line; while ((line = lines.readLine()) != null) { StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%"); String attr = tokens.nextToken(); if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken()); else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken()); } double med = rate / 1000 / tasks; double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med)); String resultLines[] = { "----- DFSCIOTest ----- : " + ((testType == TEST_TYPE_WRITE) ? "write" : (testType == TEST_TYPE_READ) ? "read" : "unknown"), " Date & time: " + new Date(System.currentTimeMillis()), " Number of files: " + tasks, "Total MBytes processed: " + size / MEGA, " Throughput mb/sec: " + size * 1000.0 / (time * MEGA), "Average IO rate mb/sec: " + med, " Std IO rate deviation: " + stdDev, " Test exec time sec: " + (float) execTime / 1000, "" }; PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true)); for (int i = 0; i < resultLines.length; i++) { LOG.info(resultLines[i]); res.println(resultLines[i]); } }
public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed) throws Exception { LOG.info("creating control file: " + megaBytes + " bytes, " + numFiles + " files"); Path controlFile = new Path(CONTROL_DIR, "files"); fs.delete(controlFile, true); Random random = new Random(seed); SequenceFile.Writer writer = SequenceFile.createWriter( fs, conf, controlFile, Text.class, LongWritable.class, CompressionType.NONE); long totalSize = 0; long maxSize = ((megaBytes / numFiles) * 2) + 1; try { while (totalSize < megaBytes) { Text name = new Text(Long.toString(random.nextLong())); long size = random.nextLong(); if (size < 0) size = -size; size = size % maxSize; // LOG.info(" adding: name="+name+" size="+size); writer.append(name, new LongWritable(size)); totalSize += size; } } finally { writer.close(); } LOG.info("created control file for: " + totalSize + " bytes"); }
@Override public void reduce( Text key, Iterator<Text> iterator, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { long sum = 0; while (iterator.hasNext()) { sum = sum + Long.parseLong(iterator.next().toString()); } output.collect(key, new Text(String.valueOf(sum))); }
public void map( Text key, LongWritable value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); random.setSeed(seed); reporter.setStatus("opening " + name); DataInputStream in = new DataInputStream(fs.open(new Path(DATA_DIR, name))); long read = 0; try { while (read < size) { long remains = size - read; int n = (remains <= buffer.length) ? (int) remains : buffer.length; in.readFully(buffer, 0, n); read += n; if (fastCheck) { Arrays.fill(check, (byte) random.nextInt(Byte.MAX_VALUE)); } else { random.nextBytes(check); } if (n != buffer.length) { Arrays.fill(buffer, n, buffer.length, (byte) 0); Arrays.fill(check, n, check.length, (byte) 0); } assertTrue(Arrays.equals(buffer, check)); reporter.setStatus("reading " + name + "@" + read + "/" + size); } } finally { in.close(); } collector.collect(new Text("bytes"), new LongWritable(read)); reporter.setStatus("read " + name); }
public void map( Text key, LongWritable value, OutputCollector<K, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); if (size == 0) return; reporter.setStatus("opening " + name); FSDataInputStream in = fs.open(new Path(DATA_DIR, name)); try { for (int i = 0; i < SEEKS_PER_FILE; i++) { // generate a random position long position = Math.abs(random.nextLong()) % size; // seek file to that position reporter.setStatus("seeking " + name); in.seek(position); byte b = in.readByte(); // check that byte matches byte checkByte = 0; // advance random state to that position random.setSeed(seed); for (int p = 0; p <= position; p += check.length) { reporter.setStatus("generating data for " + name); if (fastCheck) { checkByte = (byte) random.nextInt(Byte.MAX_VALUE); } else { random.nextBytes(check); checkByte = check[(int) (position % check.length)]; } } assertEquals(b, checkByte); } } finally { in.close(); } }
public void map( Text key, LongWritable value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); random.setSeed(seed); reporter.setStatus("creating " + name); // write to temp file initially to permit parallel execution Path tempFile = new Path(DATA_DIR, name + suffix); OutputStream out = fs.create(tempFile); long written = 0; try { while (written < size) { if (fastCheck) { Arrays.fill(buffer, (byte) random.nextInt(Byte.MAX_VALUE)); } else { random.nextBytes(buffer); } long remains = size - written; int length = (remains <= buffer.length) ? (int) remains : buffer.length; out.write(buffer, 0, length); written += length; reporter.setStatus("writing " + name + "@" + written + "/" + size); } } finally { out.close(); } // rename to final location fs.rename(tempFile, new Path(DATA_DIR, name)); collector.collect(new Text("bytes"), new LongWritable(written)); reporter.setStatus("wrote " + name); }
void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize) throws IOException { long totalSize = objSize.longValue(); float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA); LOG.info("Number of bytes processed = " + totalSize); LOG.info("Exec time = " + execTime); LOG.info("IO rate = " + ioRateMbSec); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"), new Text(String.valueOf(totalSize))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"), new Text(String.valueOf(execTime))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"), new Text(String.valueOf(ioRateMbSec * 1000))); output.collect( new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"), new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000))); }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, ClusterWritable> output, Reporter reporter) throws IOException { String movieIdStr = new String(); String reviewStr = new String(); String userIdStr = new String(); String reviews = new String(); String line = new String(); String tok = new String(""); long movieId; int review, userId, p, q, r, rater, rating, movieIndex; int clusterId = 0; int[] n = new int[maxClusters]; float[] sq_a = new float[maxClusters]; float[] sq_b = new float[maxClusters]; float[] numer = new float[maxClusters]; float[] denom = new float[maxClusters]; float max_similarity = 0.0f; float similarity = 0.0f; Cluster movie = new Cluster(); ClusterWritable movies_arrl = new ClusterWritable(); StringBuffer sb = new StringBuffer(); line = ((Text) value).toString(); movieIndex = line.indexOf(":"); for (r = 0; r < maxClusters; r++) { numer[r] = 0.0f; denom[r] = 0.0f; sq_a[r] = 0.0f; sq_b[r] = 0.0f; n[r] = 0; } if (movieIndex > 0) { movieIdStr = line.substring(0, movieIndex); sb.append(movieIdStr).append(":"); movieId = Long.parseLong(movieIdStr); movie.movie_id = movieId; reviews = line.substring(movieIndex + 1); StringTokenizer token = new StringTokenizer(reviews, ","); int attrCnt = 0; // while (token.hasMoreTokens()) { Leo while (token.hasMoreTokens() && attrCnt < attrNum) { tok = token.nextToken(); int reviewIndex = tok.indexOf("_"); // userIdStr = tok.substring(0, reviewIndex); //Leo userIdStr = String.valueOf(attrCnt); reviewStr = tok.substring(reviewIndex + 1); if (attrCnt > 0) { sb.append(","); } sb.append(String.valueOf(attrCnt)).append("_").append(reviewStr); userId = Integer.parseInt(userIdStr); review = Integer.parseInt(reviewStr); for (r = 0; r < totalClusters; r++) { /*for (q = 0; q < centroids_ref[r].total; q++) { rater = centroids_ref[r].reviews.get(q).rater_id; rating = (int) centroids_ref[r].reviews.get(q).rating; if (userId == rater) { numer[r] += (float) (review * rating); sq_a[r] += (float) (review * review); sq_b[r] += (float) (rating * rating); n[r]++; // counter break; // to avoid multiple ratings by the same reviewer } }*/ // Leo rating = (int) centroids_ref[r].reviews.get(attrCnt).rating; numer[r] += (float) ((review - rating) * (review - rating)); n[r]++; // counter } attrCnt++; } for (p = 0; p < totalClusters; p++) { /*denom[p] = (float) ((Math.sqrt((double) sq_a[p])) * (Math .sqrt((double) sq_b[p]))); if (denom[p] > 0) { similarity = numer[p] / denom[p]; if (similarity > max_similarity) { max_similarity = similarity; clusterId = p; } }*/ // Leo similarity = 250 - numer[p]; if (similarity > max_similarity) { max_similarity = similarity; clusterId = p; } } // movies_arrl.movies.add(line);//Leo movies_arrl.movies.add(sb.toString()); movies_arrl.similarities.add(max_similarity); movies_arrl.similarity = max_similarity; output.collect(new IntWritable(clusterId), movies_arrl); reporter.incrCounter(Counter.WORDS, 1); } }