public void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); System.out.println("tid: " + conf.get("mapred.task.id")); fs = FileSystem.get(conf); int blockSize = conf.getInt("matmul.block.size", 1); numRowsInBlock = conf.getInt("matmul.num.rows.in.block", 1); numColsInBlock = blockSize / numRowsInBlock; useCache = conf.getBoolean("matmul.useCache", true); }
public void map(final String[] indices, final TrackedSegments trSegs, final Context context) throws IOException, InterruptedException { System.out.println("working on: " + indices[0] + "\t" + indices[1]); int AIdx = 0, BIdx = 0; if (indices[0].contains("A")) { AIdx = 0; BIdx = 1; } else { AIdx = 1; BIdx = 0; } Segment[] segments = trSegs.segments; Segment segA = segments[AIdx]; Segment segB = segments[BIdx]; FSDataInputStream in; DataInputStream dataIn; Configuration conf = context.getConfiguration(); long versionId = conf.getLong("matmul.versionId", 0); long start, end; // read the B segment into memory int size = numRowsInBlock * numColsInBlock; double[] matrixBlockB = new double[size]; start = System.currentTimeMillis(); if (useCache) { in = fs.openCachedReadOnly(segB.getPath(), versionId); } else { in = fs.open(segB.getPath()); } in.seek(segB.getOffset()); // dataIn = new DataInputStream(new BufferedInputStream(in)); dataIn = new DataInputStream(in); for (int i = 0; i < size; ++i) { matrixBlockB[i] = dataIn.readDouble(); } in.close(); end = System.currentTimeMillis(); System.out.println("matrixB read time: " + (end - start) + " ms"); System.out.println( "matrixB read bandwidth: " + size * 8 / (end - start) / 1000 + " MBytes/s"); // prepare for context write String CRowId = indices[AIdx].split("_")[2]; String CColId = indices[BIdx].split("_")[2]; String outName = "C_" + CRowId + "_" + CColId; Path outPath = new Path(FileOutputFormat.getWorkOutputPath(context), outName); BufferedOutputStream out = new BufferedOutputStream(fs.create(outPath)); DataOutputStream dataOut = new DataOutputStream(out); // do the multiplication if (useCache) { in = fs.openCachedReadOnly(segA.getPath(), versionId); } else { in = fs.open(segA.getPath()); } in.seek(segA.getOffset()); // dataIn = new DataInputStream(new BufferedInputStream(in)); dataIn = new DataInputStream(in); long readTime = 0, calcTime = 0; for (int i = 0; i < numRowsInBlock; ++i) { double[] rowA = new double[numColsInBlock]; start = System.currentTimeMillis(); for (int j = 0; j < numColsInBlock; ++j) { rowA[j] = dataIn.readDouble(); } end = System.currentTimeMillis(); readTime += end - start; // caclulate out[i, :] start = System.currentTimeMillis(); for (int j = 0; j < numRowsInBlock; ++j) { // calculate out [i, j] double sum = 0; for (int k = 0; k < numColsInBlock; ++k) { sum += rowA[k] * matrixBlockB[j * numColsInBlock + k]; } dataOut.writeDouble(sum); } end = System.currentTimeMillis(); calcTime += end - start; } in.close(); dataOut.close(); System.out.println("matrixA read time: " + readTime + " ms"); System.out.println("matrixA read bandwidth: " + size * 8 / readTime / 1000 + " MBytes/s"); System.out.println("multiplication calc time: " + calcTime + " ms"); }