/**
   * @param testname
   * @param sparse
   * @param rewrites
   * @param rep
   * @param instType
   */
  private void runWeightedCrossEntropyTest(
      String testname, boolean sparse, boolean rewrites, boolean rep, ExecType instType) {
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch (instType) {
      case MR:
        rtplatform = RUNTIME_PLATFORM.HADOOP;
        break;
      case SPARK:
        rtplatform = RUNTIME_PLATFORM.SPARK;
        break;
      default:
        rtplatform = RUNTIME_PLATFORM.HYBRID;
        break;
    }

    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK) DMLScript.USE_LOCAL_SPARK_CONFIG = true;

    boolean rewritesOld = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
    boolean forceOld = QuaternaryOp.FORCE_REPLICATION;

    OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
    QuaternaryOp.FORCE_REPLICATION = rep;

    try {
      double sparsity = (sparse) ? spSparse : spDense;
      String TEST_NAME = testname;

      TestConfiguration config = getTestConfiguration(TEST_NAME);

      // This is for running the junit test the new way, i.e., construct the arguments directly
      String HOME = SCRIPT_DIR + TEST_DIR;
      fullDMLScriptName = HOME + TEST_NAME + ".dml";
      programArgs =
          new String[] {
            "-stats",
            "-explain",
            "runtime",
            "-args",
            HOME + INPUT_DIR + "X",
            HOME + INPUT_DIR + "U",
            HOME + INPUT_DIR + "V",
            HOME + OUTPUT_DIR + "R"
          };
      fullRScriptName = HOME + TEST_NAME + ".R";
      rCmd = "Rscript" + " " + fullRScriptName + " " + HOME + INPUT_DIR + " " + HOME + EXPECTED_DIR;

      loadTestConfiguration(config);

      // generate actual dataset
      double[][] X = getRandomMatrix(rows, cols, 0, 1, sparsity, 7);
      writeInputMatrixWithMTD("X", X, true);
      double[][] U = getRandomMatrix(rows, rank, 0, 1, 1.0, 678);
      writeInputMatrixWithMTD("U", U, true);
      double[][] V = getRandomMatrix(cols, rank, 0, 1, 1.0, 912);
      writeInputMatrixWithMTD("V", V, true);

      // run the scripts
      runTest(true, false, null, -1);
      runRScript(true);

      // compare matrices
      HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("R");
      HashMap<CellIndex, Double> rfile = readRMatrixFromFS("R");
      TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
      checkDMLMetaDataFile("R", new MatrixCharacteristics(1, 1, 1, 1));

      // check statistics for right operator in cp
      if (instType == ExecType.CP && rewrites)
        Assert.assertTrue(
            "Missing opcode wcemm",
            Statistics.getCPHeavyHitterOpCodes().contains(WeightedCrossEntropy.OPCODE_CP));
      else if (instType == ExecType.SPARK && rewrites) {
        Assert.assertTrue(
            "Missing opcode sp_wcemm",
            !rep
                    && Statistics.getCPHeavyHitterOpCodes()
                        .contains(Instruction.SP_INST_PREFIX + WeightedCrossEntropy.OPCODE)
                || rep
                    && Statistics.getCPHeavyHitterOpCodes()
                        .contains(Instruction.SP_INST_PREFIX + WeightedCrossEntropyR.OPCODE));
      }
    } finally {
      rtplatform = platformOld;
      DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
      OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewritesOld;
      QuaternaryOp.FORCE_REPLICATION = forceOld;
    }
  }
  /**
   * @param sparseM1
   * @param sparseM2
   * @param instType
   */
  private void runMinMaxComparisonTest(
      OpType type,
      DataType dtM1,
      DataType dtM2,
      boolean sparseM1,
      boolean sparseM2,
      ExecType instType) {
    // rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    rtplatform = (instType == ExecType.MR) ? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.HYBRID;

    // get the testname
    String TEST_NAME = null;
    int minFlag = (type == OpType.MIN) ? 1 : 0;
    boolean s1Flag = (dtM1 == DataType.SCALAR);
    boolean s2Flag = (dtM2 == DataType.SCALAR);

    if (s1Flag && s2Flag) TEST_NAME = TEST_NAME4;
    else if (s1Flag) TEST_NAME = TEST_NAME2;
    else if (s2Flag) TEST_NAME = TEST_NAME3;
    else TEST_NAME = TEST_NAME1;

    String TEST_CACHE_DIR = "";
    if (TEST_CACHE_ENABLED) {
      int mrows1 = (dtM1 == DataType.MATRIX) ? rows : 1;
      int mrows2 = (dtM2 == DataType.MATRIX) ? rows : 1;

      double sparsityLeft = sparseM1 ? sparsity2 : sparsity1;
      double sparsityRight = sparseM2 ? sparsity2 : sparsity1;

      TEST_CACHE_DIR =
          minFlag + "_" + mrows1 + "_" + mrows2 + "_" + sparsityLeft + "_" + sparsityRight + "/";
    }

    try {
      TestConfiguration config = getTestConfiguration(TEST_NAME);
      loadTestConfiguration(config, TEST_CACHE_DIR);

      // This is for running the junit test the new way, i.e., construct the arguments directly
      String HOME = SCRIPT_DIR + TEST_DIR;
      fullDMLScriptName = HOME + TEST_NAME + ".dml";
      programArgs =
          new String[] {
            "-explain", "-args", input("A"), input("B"), Integer.toString(minFlag), output("C")
          };

      fullRScriptName = HOME + TEST_NAME_R + ".R";
      rCmd =
          "Rscript"
              + " "
              + fullRScriptName
              + " "
              + inputDir()
              + " "
              + minFlag
              + " "
              + expectedDir();

      // generate actual dataset
      int mrows1 = (dtM1 == DataType.MATRIX) ? rows : 1;
      int mcols1 = (dtM1 == DataType.MATRIX) ? cols : 1;
      int mrows2 = (dtM2 == DataType.MATRIX) ? rows : 1;
      int mcols2 = (dtM2 == DataType.MATRIX) ? cols : 1;
      double[][] A = getRandomMatrix(mrows1, mcols1, -1, 1, sparseM1 ? sparsity2 : sparsity1, 7);
      writeInputMatrix("A", A, true);
      MatrixCharacteristics mc1 = new MatrixCharacteristics(mrows1, mcols1, 1000, 1000);
      MapReduceTool.writeMetaDataFile(
          input("A.mtd"), ValueType.DOUBLE, mc1, OutputInfo.TextCellOutputInfo);

      double[][] B = getRandomMatrix(mrows2, mcols2, -1, 1, sparseM2 ? sparsity2 : sparsity1, 3);
      writeInputMatrix("B", B, true);
      MatrixCharacteristics mc2 = new MatrixCharacteristics(mrows2, mcols2, 1000, 1000);
      MapReduceTool.writeMetaDataFile(
          input("B.mtd"), ValueType.DOUBLE, mc2, OutputInfo.TextCellOutputInfo);

      // run test
      runTest(true, false, null, -1);
      runRScript(true);

      // compare matrices
      HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("C");
      HashMap<CellIndex, Double> rfile = readRMatrixFromFS("C");
      TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } catch (IOException e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    } finally {
      rtplatform = platformOld;
    }
  }
  /**
   * @param sparseM1
   * @param sparseM2
   * @param instType
   */
  private void runColAggregateOperationTest(
      OpType type, boolean sparse, boolean vector, ExecType instType, boolean rewrites) {
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch (instType) {
      case MR:
        rtplatform = RUNTIME_PLATFORM.HADOOP;
        break;
      case SPARK:
        rtplatform = RUNTIME_PLATFORM.SPARK;
        break;
      default:
        rtplatform = RUNTIME_PLATFORM.HYBRID;
        break;
    }

    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK) DMLScript.USE_LOCAL_SPARK_CONFIG = true;

    boolean oldRewritesFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
    OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;

    try {
      String TEST_NAME = null;
      switch (type) {
        case COL_SUMS:
          TEST_NAME = TEST_NAME1;
          break;
        case COL_MEANS:
          TEST_NAME = TEST_NAME2;
          break;
        case COL_MAX:
          TEST_NAME = TEST_NAME3;
          break;
        case COL_MIN:
          TEST_NAME = TEST_NAME4;
          break;
      }

      int cols = (vector) ? cols1 : cols2;
      double sparsity = (sparse) ? sparsity1 : sparsity2;

      getAndLoadTestConfiguration(TEST_NAME);

      /* This is for running the junit test the new way, i.e., construct the arguments directly */
      String HOME = SCRIPT_DIR + TEST_DIR;
      fullDMLScriptName = HOME + TEST_NAME + ".dml";
      programArgs =
          new String[] {
            "-args", input("A"), Integer.toString(rows), Integer.toString(cols), output("B")
          };

      fullRScriptName = HOME + TEST_NAME + ".R";
      rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();

      // generate actual dataset
      double[][] A = getRandomMatrix(rows, cols, -0.05, 1, sparsity, 7);
      writeInputMatrix("A", A, true);

      boolean exceptionExpected = false;
      runTest(true, exceptionExpected, null, -1);

      runRScript(true);

      // compare matrices
      HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B");
      HashMap<CellIndex, Double> rfile = readRMatrixFromFS("B");
      TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } finally {
      rtplatform = platformOld;
      DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
      OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewritesFlag;
    }
  }
Beispiel #4
0
  /**
   * @param sparseM1
   * @param sparseM2
   * @param instType
   */
  private void runGDFOTest(String testname, boolean sparse, ExecType instType) {
    // rtplatform for MR
    RUNTIME_PLATFORM platformOld = rtplatform;
    switch (instType) {
      case MR:
        rtplatform = RUNTIME_PLATFORM.HADOOP;
        break;
      case SPARK:
        rtplatform = RUNTIME_PLATFORM.SPARK;
        break;
      default:
        rtplatform = RUNTIME_PLATFORM.HYBRID;
        break;
    }

    boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
    if (rtplatform == RUNTIME_PLATFORM.SPARK) DMLScript.USE_LOCAL_SPARK_CONFIG = true;

    try {
      String TEST_NAME = testname;
      TestConfiguration config = getTestConfiguration(TEST_NAME);

      /* This is for running the junit test the new way, i.e., construct the arguments directly */
      String HOME = SCRIPT_DIR + TEST_DIR;
      fullDMLScriptName = HOME + TEST_NAME + ".dml";
      programArgs =
          new String[] {
            "-explain", // "hops",
            "-config=" + HOME + TEST_CONF,
            "-args",
            HOME + INPUT_DIR + "X",
            HOME + INPUT_DIR + "y",
            String.valueOf(intercept),
            String.valueOf(epsilon),
            String.valueOf(maxiter),
            HOME + OUTPUT_DIR + "w"
          };
      fullRScriptName = HOME + TEST_NAME + ".R";
      rCmd =
          "Rscript"
              + " "
              + fullRScriptName
              + " "
              + HOME
              + INPUT_DIR
              + " "
              + String.valueOf(intercept)
              + " "
              + String.valueOf(epsilon)
              + " "
              + String.valueOf(maxiter)
              + " "
              + HOME
              + EXPECTED_DIR;

      loadTestConfiguration(config);

      // generate actual datasets
      double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse ? sparsity2 : sparsity1, 7);
      writeInputMatrixWithMTD("X", X, true);
      double[][] y = getRandomMatrix(rows, 1, 0, 10, 1.0, 3);
      writeInputMatrixWithMTD("y", y, true);

      runTest(true, false, null, -1);
      runRScript(true);

      // compare matrices
      HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w");
      HashMap<CellIndex, Double> rfile = readRMatrixFromFS("w");
      TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
    } finally {
      rtplatform = platformOld;
      DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
    }
  }