/** * @param testname * @param sparse * @param rewrites * @param rep * @param instType */ private void runWeightedCrossEntropyTest( String testname, boolean sparse, boolean rewrites, boolean rep, ExecType instType) { RUNTIME_PLATFORM platformOld = rtplatform; switch (instType) { case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break; case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; default: rtplatform = RUNTIME_PLATFORM.HYBRID; break; } boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; if (rtplatform == RUNTIME_PLATFORM.SPARK) DMLScript.USE_LOCAL_SPARK_CONFIG = true; boolean rewritesOld = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION; boolean forceOld = QuaternaryOp.FORCE_REPLICATION; OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites; QuaternaryOp.FORCE_REPLICATION = rep; try { double sparsity = (sparse) ? spSparse : spDense; String TEST_NAME = testname; TestConfiguration config = getTestConfiguration(TEST_NAME); // This is for running the junit test the new way, i.e., construct the arguments directly String HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = HOME + TEST_NAME + ".dml"; programArgs = new String[] { "-stats", "-explain", "runtime", "-args", HOME + INPUT_DIR + "X", HOME + INPUT_DIR + "U", HOME + INPUT_DIR + "V", HOME + OUTPUT_DIR + "R" }; fullRScriptName = HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + HOME + INPUT_DIR + " " + HOME + EXPECTED_DIR; loadTestConfiguration(config); // generate actual dataset double[][] X = getRandomMatrix(rows, cols, 0, 1, sparsity, 7); writeInputMatrixWithMTD("X", X, true); double[][] U = getRandomMatrix(rows, rank, 0, 1, 1.0, 678); writeInputMatrixWithMTD("U", U, true); double[][] V = getRandomMatrix(cols, rank, 0, 1, 1.0, 912); writeInputMatrixWithMTD("V", V, true); // run the scripts runTest(true, false, null, -1); runRScript(true); // compare matrices HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("R"); HashMap<CellIndex, Double> rfile = readRMatrixFromFS("R"); TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R"); checkDMLMetaDataFile("R", new MatrixCharacteristics(1, 1, 1, 1)); // check statistics for right operator in cp if (instType == ExecType.CP && rewrites) Assert.assertTrue( "Missing opcode wcemm", Statistics.getCPHeavyHitterOpCodes().contains(WeightedCrossEntropy.OPCODE_CP)); else if (instType == ExecType.SPARK && rewrites) { Assert.assertTrue( "Missing opcode sp_wcemm", !rep && Statistics.getCPHeavyHitterOpCodes() .contains(Instruction.SP_INST_PREFIX + WeightedCrossEntropy.OPCODE) || rep && Statistics.getCPHeavyHitterOpCodes() .contains(Instruction.SP_INST_PREFIX + WeightedCrossEntropyR.OPCODE)); } } finally { rtplatform = platformOld; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewritesOld; QuaternaryOp.FORCE_REPLICATION = forceOld; } }
@Override public void setUp() { TestUtils.clearAssertionInformation(); addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_DIR, TEST_NAME, new String[] {"R"})); }
/** * @param sparseM1 * @param sparseM2 * @param instType */ private void runColAggregateOperationTest( OpType type, boolean sparse, boolean vector, ExecType instType, boolean rewrites) { RUNTIME_PLATFORM platformOld = rtplatform; switch (instType) { case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break; case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; default: rtplatform = RUNTIME_PLATFORM.HYBRID; break; } boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; if (rtplatform == RUNTIME_PLATFORM.SPARK) DMLScript.USE_LOCAL_SPARK_CONFIG = true; boolean oldRewritesFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION; OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites; try { String TEST_NAME = null; switch (type) { case COL_SUMS: TEST_NAME = TEST_NAME1; break; case COL_MEANS: TEST_NAME = TEST_NAME2; break; case COL_MAX: TEST_NAME = TEST_NAME3; break; case COL_MIN: TEST_NAME = TEST_NAME4; break; } int cols = (vector) ? cols1 : cols2; double sparsity = (sparse) ? sparsity1 : sparsity2; getAndLoadTestConfiguration(TEST_NAME); /* This is for running the junit test the new way, i.e., construct the arguments directly */ String HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = HOME + TEST_NAME + ".dml"; programArgs = new String[] { "-args", input("A"), Integer.toString(rows), Integer.toString(cols), output("B") }; fullRScriptName = HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir(); // generate actual dataset double[][] A = getRandomMatrix(rows, cols, -0.05, 1, sparsity, 7); writeInputMatrix("A", A, true); boolean exceptionExpected = false; runTest(true, exceptionExpected, null, -1); runRScript(true); // compare matrices HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B"); HashMap<CellIndex, Double> rfile = readRMatrixFromFS("B"); TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R"); } finally { rtplatform = platformOld; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewritesFlag; } }
@AfterClass public static void cleanUp() { if (TEST_CACHE_ENABLED) { TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR); } }
@BeforeClass public static void init() { TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR); }
/** * @param sparseM1 * @param sparseM2 * @param instType */ private void runMinMaxComparisonTest( OpType type, DataType dtM1, DataType dtM2, boolean sparseM1, boolean sparseM2, ExecType instType) { // rtplatform for MR RUNTIME_PLATFORM platformOld = rtplatform; rtplatform = (instType == ExecType.MR) ? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.HYBRID; // get the testname String TEST_NAME = null; int minFlag = (type == OpType.MIN) ? 1 : 0; boolean s1Flag = (dtM1 == DataType.SCALAR); boolean s2Flag = (dtM2 == DataType.SCALAR); if (s1Flag && s2Flag) TEST_NAME = TEST_NAME4; else if (s1Flag) TEST_NAME = TEST_NAME2; else if (s2Flag) TEST_NAME = TEST_NAME3; else TEST_NAME = TEST_NAME1; String TEST_CACHE_DIR = ""; if (TEST_CACHE_ENABLED) { int mrows1 = (dtM1 == DataType.MATRIX) ? rows : 1; int mrows2 = (dtM2 == DataType.MATRIX) ? rows : 1; double sparsityLeft = sparseM1 ? sparsity2 : sparsity1; double sparsityRight = sparseM2 ? sparsity2 : sparsity1; TEST_CACHE_DIR = minFlag + "_" + mrows1 + "_" + mrows2 + "_" + sparsityLeft + "_" + sparsityRight + "/"; } try { TestConfiguration config = getTestConfiguration(TEST_NAME); loadTestConfiguration(config, TEST_CACHE_DIR); // This is for running the junit test the new way, i.e., construct the arguments directly String HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = HOME + TEST_NAME + ".dml"; programArgs = new String[] { "-explain", "-args", input("A"), input("B"), Integer.toString(minFlag), output("C") }; fullRScriptName = HOME + TEST_NAME_R + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + minFlag + " " + expectedDir(); // generate actual dataset int mrows1 = (dtM1 == DataType.MATRIX) ? rows : 1; int mcols1 = (dtM1 == DataType.MATRIX) ? cols : 1; int mrows2 = (dtM2 == DataType.MATRIX) ? rows : 1; int mcols2 = (dtM2 == DataType.MATRIX) ? cols : 1; double[][] A = getRandomMatrix(mrows1, mcols1, -1, 1, sparseM1 ? sparsity2 : sparsity1, 7); writeInputMatrix("A", A, true); MatrixCharacteristics mc1 = new MatrixCharacteristics(mrows1, mcols1, 1000, 1000); MapReduceTool.writeMetaDataFile( input("A.mtd"), ValueType.DOUBLE, mc1, OutputInfo.TextCellOutputInfo); double[][] B = getRandomMatrix(mrows2, mcols2, -1, 1, sparseM2 ? sparsity2 : sparsity1, 3); writeInputMatrix("B", B, true); MatrixCharacteristics mc2 = new MatrixCharacteristics(mrows2, mcols2, 1000, 1000); MapReduceTool.writeMetaDataFile( input("B.mtd"), ValueType.DOUBLE, mc2, OutputInfo.TextCellOutputInfo); // run test runTest(true, false, null, -1); runRScript(true); // compare matrices HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("C"); HashMap<CellIndex, Double> rfile = readRMatrixFromFS("C"); TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R"); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } finally { rtplatform = platformOld; } }
/** * @param sparseM1 * @param sparseM2 * @param instType */ private void runGDFOTest(String testname, boolean sparse, ExecType instType) { // rtplatform for MR RUNTIME_PLATFORM platformOld = rtplatform; switch (instType) { case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break; case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; default: rtplatform = RUNTIME_PLATFORM.HYBRID; break; } boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; if (rtplatform == RUNTIME_PLATFORM.SPARK) DMLScript.USE_LOCAL_SPARK_CONFIG = true; try { String TEST_NAME = testname; TestConfiguration config = getTestConfiguration(TEST_NAME); /* This is for running the junit test the new way, i.e., construct the arguments directly */ String HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = HOME + TEST_NAME + ".dml"; programArgs = new String[] { "-explain", // "hops", "-config=" + HOME + TEST_CONF, "-args", HOME + INPUT_DIR + "X", HOME + INPUT_DIR + "y", String.valueOf(intercept), String.valueOf(epsilon), String.valueOf(maxiter), HOME + OUTPUT_DIR + "w" }; fullRScriptName = HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + HOME + INPUT_DIR + " " + String.valueOf(intercept) + " " + String.valueOf(epsilon) + " " + String.valueOf(maxiter) + " " + HOME + EXPECTED_DIR; loadTestConfiguration(config); // generate actual datasets double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse ? sparsity2 : sparsity1, 7); writeInputMatrixWithMTD("X", X, true); double[][] y = getRandomMatrix(rows, 1, 0, 10, 1.0, 3); writeInputMatrixWithMTD("y", y, true); runTest(true, false, null, -1); runRScript(true); // compare matrices HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w"); HashMap<CellIndex, Double> rfile = readRMatrixFromFS("w"); TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R"); } finally { rtplatform = platformOld; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; } }