/** * Partitions the data represented by dims into groups of records where records are whole * subarrays with size 1 on the zero-th dimension. This may not work for all formats, revisit this * later TODO * * @param dims List of Dimension objects representing the dimensions of the input data that we are * generating partitions for * @param varName Name of the variable we are generating partitions for * @param fileName name of the file that contains the variable we are generating partitions for * @param partMode the partitioning mode being used to generate the partitions * @param startOffset the logical offset in the input data to start creating partitions at * @param conf Configuration object for this execution the given MR program * @return an array of ArraySpec objects that represent the partitions this function generated */ protected ArraySpec[] recordBasedPartition( int[] dims, String varName, String fileName, PartMode partMode, int[] startOffset, Configuration conf) throws IOException { int ndims = dims.length; int recDimLen = dims[0]; int[] recordShape = new int[ndims]; int[] recordCorner = new int[ndims]; ArrayList<ArraySpec> records = new ArrayList<ArraySpec>(recDimLen); for (int i = 0; i < ndims; i++) { recordShape[i] = dims[i]; recordCorner[i] = 0; } recordShape[0] = 1; if (Utils.queryDependantEnabled(conf)) { LOG.info("Query Dependant enabled"); recordShape[0] = Utils.getExtractionShape(conf, recordShape.length)[0]; } else { LOG.info("Query Dependant NOT enabled"); } ArraySpec tempSpec = null; for (int i = 0; i < recDimLen; i += recordShape[0]) { recordCorner[0] = i; // FIXME: this is clunky try { // if this is optC and the record is not valid, do not add it, if (Utils.noScanEnabled(conf)) { if (Utils.isValid(recordCorner, conf)) { tempSpec = new ArraySpec(recordCorner, recordShape, varName, fileName); tempSpec.setLogicalStartOffset(startOffset); records.add(tempSpec); // records.add(new ArraySpec(recordCorner, recordShape, varName, fileName)); } } else { // else wise do add it tempSpec = new ArraySpec(recordCorner, recordShape, varName, fileName); tempSpec.setLogicalStartOffset(startOffset); records.add(tempSpec); // records.add(new ArraySpec(recordCorner, recordShape, varName, fileName)); } } catch (Exception e) { throw new IOException(e); } } ArraySpec[] returnArray = new ArraySpec[records.size()]; returnArray = records.toArray(returnArray); return returnArray; }
/** * The partitioning scheme creates partitions distributes the data to be read (approximately) * evenly over all the blocks in the file. This is a very naive approach and should not be used * other than as a point of refernece. * * @param dims represent the logical input space * @param varName name of the variable that we're creating partitions for * @param blockSize the size, in bytes, of the blocks used to store the file that contains the * data being partitioned * @param numBlocks the number of blocks to use for generating the per-partition size * @param fileLen the length of the file, in bytes * @param dataTypeSize the size, in bytes, of a single cell for the given data type stored in the * file for which partitions are being generated * @param fileName name of the file that partitions are being generated for * @param startOffset the logical position in the file to begin generating partitions from * @param conf Configuration object for this current MR program * @return an array of ArraySpecs that is the same length as dims, where each ArraySpec * corresponds to a partition */ protected ArraySpec[] proportionalPartition( int[] dims, String varName, long blockSize, long numBlocks, long fileLen, int dataTypeSize, String fileName, int[] startOffset, Configuration conf) throws IOException { int ndims = dims.length; ArrayList<ArraySpec> records = new ArrayList<ArraySpec>(); // this next bit is fairly hard-coded and specific to our tests. // it represents a naive split that a human might come up with // sort out the step size int[] stepShape = calcStepShape(dims, blockSize, numBlocks, fileLen, dataTypeSize, conf); int[] tempCorner = new int[ndims]; int[] tempStep = new int[ndims]; // initialize the temporary step shape to be the first step for (int i = 0; i < ndims; i++) { tempStep[i] = stepShape[i]; tempCorner[i] = 0; } LOG.info("Calculated stepshape: " + Utils.arrayToString(stepShape)); ArraySpec tempSpec = new ArraySpec(); int stepSize = tempStep[0]; // create the actual splits while (tempCorner[0] < dims[0]) { try { if (Utils.noScanEnabled(conf)) { if (Utils.isValid(tempCorner, conf)) { tempSpec = new ArraySpec(tempCorner, tempStep, varName, fileName); tempSpec.setLogicalStartOffset(startOffset); records.add(tempSpec); } } else { tempSpec = new ArraySpec(tempCorner, tempStep, varName, fileName); tempSpec.setLogicalStartOffset(startOffset); records.add(tempSpec); } } catch (Exception e) { throw new IOException(e); } // update the corner tempCorner[0] += stepSize; // use MIN here to make sure we don't over run the constraining space stepSize = Math.min(tempStep[0], dims[0] - tempCorner[0]); // update the shape of the next write appropriately tempStep[0] = stepSize; } ArraySpec[] returnArray = new ArraySpec[records.size()]; returnArray = records.toArray(returnArray); return returnArray; }