/** * Calculates the size of each parition when the proportional partitioning scheme is used. * * @param dims represent the logical input space * @param blockSize the size, in bytes, of the blocks used to store the file that contains the * data being partitioned * @param numBlocks the number of blocks to use for generating the per-partition size * @param fileLen the length of the file, in bytes * @param dataTypeSize the size, in bytes, of a single cell for the given data type stored in the * file for which partitions are being generated * @param conf Configuration object for this current MR program * @return an int array that is the same length as dims, where each element is the length, in * cells, that the step shape is in the given dimension */ private int[] calcStepShape( int[] dims, long blockSize, long numBlocks, long fileLen, int dataTypeSize, Configuration conf) { int[] stepShape = new int[dims.length]; // sort out the max space for (int i = 0; i < dims.length; i++) { stepShape[i] = dims[i]; } stepShape[0] = Math.round(Math.max(1, stepShape[0] / numBlocks)); // if holistic functions are turned on, we need to make // sure this encompasses enough records // Also need to ensure that it ends up a // being a multiple of the zero-dimension of extraction shape if (Utils.queryDependantEnabled(conf)) { int numExShapesInStep = Math.max((stepShape[0] / Utils.getExtractionShape(conf, stepShape.length)[0]), 1); stepShape[0] = numExShapesInStep * Utils.getExtractionShape(conf, stepShape.length)[0]; } return stepShape; }
/** * Partitions the data represented by dims into groups of records where records are whole * subarrays with size 1 on the zero-th dimension. This may not work for all formats, revisit this * later TODO * * @param dims List of Dimension objects representing the dimensions of the input data that we are * generating partitions for * @param varName Name of the variable we are generating partitions for * @param fileName name of the file that contains the variable we are generating partitions for * @param partMode the partitioning mode being used to generate the partitions * @param startOffset the logical offset in the input data to start creating partitions at * @param conf Configuration object for this execution the given MR program * @return an array of ArraySpec objects that represent the partitions this function generated */ protected ArraySpec[] recordBasedPartition( int[] dims, String varName, String fileName, PartMode partMode, int[] startOffset, Configuration conf) throws IOException { int ndims = dims.length; int recDimLen = dims[0]; int[] recordShape = new int[ndims]; int[] recordCorner = new int[ndims]; ArrayList<ArraySpec> records = new ArrayList<ArraySpec>(recDimLen); for (int i = 0; i < ndims; i++) { recordShape[i] = dims[i]; recordCorner[i] = 0; } recordShape[0] = 1; if (Utils.queryDependantEnabled(conf)) { LOG.info("Query Dependant enabled"); recordShape[0] = Utils.getExtractionShape(conf, recordShape.length)[0]; } else { LOG.info("Query Dependant NOT enabled"); } ArraySpec tempSpec = null; for (int i = 0; i < recDimLen; i += recordShape[0]) { recordCorner[0] = i; // FIXME: this is clunky try { // if this is optC and the record is not valid, do not add it, if (Utils.noScanEnabled(conf)) { if (Utils.isValid(recordCorner, conf)) { tempSpec = new ArraySpec(recordCorner, recordShape, varName, fileName); tempSpec.setLogicalStartOffset(startOffset); records.add(tempSpec); // records.add(new ArraySpec(recordCorner, recordShape, varName, fileName)); } } else { // else wise do add it tempSpec = new ArraySpec(recordCorner, recordShape, varName, fileName); tempSpec.setLogicalStartOffset(startOffset); records.add(tempSpec); // records.add(new ArraySpec(recordCorner, recordShape, varName, fileName)); } } catch (Exception e) { throw new IOException(e); } } ArraySpec[] returnArray = new ArraySpec[records.size()]; returnArray = records.toArray(returnArray); return returnArray; }