/* * The constructor establishes the RuntimeContract between * the SQL-MR function and Aster Database. During query planning, * the function will constructed on a single node. During * query execution, it will be constructed and run on one or more nodes. */ public ChiSquareTest(RuntimeContract contract) { // Read argument clauses into appropriate member variables. observedArgumentIdx = contract .getInputInfo() .getColumnIndex(contract.useArgumentClause("observed").getSingleValue()); expectedArgumentIdx = contract .getInputInfo() .getColumnIndex(contract.useArgumentClause("expected").getSingleValue()); accumulator = new Accumulator(contract); errorHandler = new ErrorHandler( this.getClass().getName(), Utils.getSingleBooleanFromParameter(contract, "stoponerror", false), Utils.getSingleIntFromParameter(contract, "log", Integer.MAX_VALUE)); // Initialize the service chiSquareTest = new org.apache.commons.math3.stat.inference.ChiSquareTest(); // Verify that the function accepts the given input schema. if (!SqlType.integer().equals(contract.getInputInfo().getColumnType(observedArgumentIdx))) { throw new IllegalUsageException( "\"" + contract.useArgumentClause("observed").getSingleValue() + "\" must be of type 'integer'"); } if (!SqlType.doublePrecision() .equals(contract.getInputInfo().getColumnType(expectedArgumentIdx))) { throw new IllegalUsageException( "\"" + contract.useArgumentClause("expected").getSingleValue() + "\" must be a numeric type"); } // Construct the output schema List<ColumnDefinition> outputColumns = new ArrayList<ColumnDefinition>(); accumulator.constructOutputSchema(contract, outputColumns); outputColumns.add(new ColumnDefinition("pValue", SqlType.getType("double"))); contract.setOutputInfo(new OutputInfo(outputColumns)); // Complete the contract contract.complete(); }
public void operateOnPartition( PartitionDefinition definition, RowIterator inputIterator, RowEmitter outputEmitter) { errorHandler.enterOperateOnPartition(definition, inputIterator, outputEmitter); try { // Collect input rows for observed and expected values ArrayList<Double> expectedList = new ArrayList<Double>(); ArrayList<Long> observedList = new ArrayList<Long>(); while (inputIterator.advanceToNextRow()) { errorHandler.enterOperateOnRow(inputIterator, outputEmitter); if (inputIterator.isNullAt(observedArgumentIdx) || inputIterator.isNullAt(expectedArgumentIdx)) throw new IllegalArgumentException("observed and expected values cannot be null"); expectedList.add(inputIterator.getDoubleAt(expectedArgumentIdx)); observedList.add(inputIterator.getLongAt(observedArgumentIdx)); errorHandler.exitOperateOnRow(); } double[] expected = new double[expectedList.size()]; for (int i = 0; i < expected.length; i++) expected[i] = expectedList.get(i); long[] observed = new long[observedList.size()]; for (int i = 0; i < observed.length; i++) observed[i] = observedList.get(i); // Run test double pValue = chiSquareTest.chiSquareTest(expected, observed); // Emit result accumulator.emit(inputIterator, outputEmitter); outputEmitter.addDouble(pValue); outputEmitter.emitRow(); } catch (IllegalArgumentException e) { errorHandler.catchException(e); return; // End this partition and go to next if stopOnError is set to false (otherwise // exception is thrown) } errorHandler.exitOperateOnPartition(); }