@Override
 protected void setup(Context context) throws IOException, InterruptedException {
   Configuration conf = context.getConfiguration();
   schema = AvroJob.getMapOutputSchema(conf);
   lobLoader = new LargeObjectLoader(conf, FileOutputFormat.getWorkOutputPath(context));
   bigDecimalFormatString =
       conf.getBoolean(
           ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
           ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
 }
Beispiel #2
0
    public void map(final String[] indices, final TrackedSegments trSegs, final Context context)
        throws IOException, InterruptedException {

      System.out.println("working on: " + indices[0] + "\t" + indices[1]);

      int AIdx = 0, BIdx = 0;
      if (indices[0].contains("A")) {
        AIdx = 0;
        BIdx = 1;
      } else {
        AIdx = 1;
        BIdx = 0;
      }

      Segment[] segments = trSegs.segments;
      Segment segA = segments[AIdx];
      Segment segB = segments[BIdx];

      FSDataInputStream in;
      DataInputStream dataIn;

      Configuration conf = context.getConfiguration();
      long versionId = conf.getLong("matmul.versionId", 0);

      long start, end;

      // read the B segment into memory
      int size = numRowsInBlock * numColsInBlock;
      double[] matrixBlockB = new double[size];
      start = System.currentTimeMillis();
      if (useCache) {
        in = fs.openCachedReadOnly(segB.getPath(), versionId);
      } else {
        in = fs.open(segB.getPath());
      }
      in.seek(segB.getOffset());
      // dataIn = new DataInputStream(new BufferedInputStream(in));
      dataIn = new DataInputStream(in);
      for (int i = 0; i < size; ++i) {
        matrixBlockB[i] = dataIn.readDouble();
      }
      in.close();
      end = System.currentTimeMillis();
      System.out.println("matrixB read time: " + (end - start) + " ms");
      System.out.println(
          "matrixB read bandwidth: " + size * 8 / (end - start) / 1000 + " MBytes/s");

      // prepare for context write
      String CRowId = indices[AIdx].split("_")[2];
      String CColId = indices[BIdx].split("_")[2];
      String outName = "C_" + CRowId + "_" + CColId;
      Path outPath = new Path(FileOutputFormat.getWorkOutputPath(context), outName);
      BufferedOutputStream out = new BufferedOutputStream(fs.create(outPath));
      DataOutputStream dataOut = new DataOutputStream(out);

      // do the multiplication
      if (useCache) {
        in = fs.openCachedReadOnly(segA.getPath(), versionId);
      } else {
        in = fs.open(segA.getPath());
      }
      in.seek(segA.getOffset());
      // dataIn = new DataInputStream(new BufferedInputStream(in));
      dataIn = new DataInputStream(in);
      long readTime = 0, calcTime = 0;
      for (int i = 0; i < numRowsInBlock; ++i) {
        double[] rowA = new double[numColsInBlock];
        start = System.currentTimeMillis();
        for (int j = 0; j < numColsInBlock; ++j) {
          rowA[j] = dataIn.readDouble();
        }
        end = System.currentTimeMillis();
        readTime += end - start;

        // caclulate out[i, :]
        start = System.currentTimeMillis();
        for (int j = 0; j < numRowsInBlock; ++j) {
          // calculate out [i, j]
          double sum = 0;
          for (int k = 0; k < numColsInBlock; ++k) {
            sum += rowA[k] * matrixBlockB[j * numColsInBlock + k];
          }
          dataOut.writeDouble(sum);
        }
        end = System.currentTimeMillis();
        calcTime += end - start;
      }
      in.close();
      dataOut.close();
      System.out.println("matrixA read time: " + readTime + " ms");
      System.out.println("matrixA read bandwidth: " + size * 8 / readTime / 1000 + " MBytes/s");
      System.out.println("multiplication calc time: " + calcTime + " ms");
    }