/** {@inheritDoc} */ @Override protected void configureJob(Job job) throws IOException { final Configuration conf = job.getConfiguration(); // Construct the producer instance. if (null == mProducerClass) { throw new JobConfigurationException("Must specify a producer."); } mProducer = ReflectionUtils.newInstance(mProducerClass, job.getConfiguration()); mDataRequest = mProducer.getDataRequest(); // Serialize the producer class name into the job configuration. conf.setClass(KijiProducer.CONF_PRODUCER_CLASS, mProducerClass, KijiProducer.class); // Configure the mapper and reducer to use. Preconditions.checkState(getJobOutput() instanceof KijiTableMapReduceJobOutput); // Write to the table, but make sure the output table is the same as the input table. if (!getInputTable().equals(((KijiTableMapReduceJobOutput) getJobOutput()).getTable())) { throw new JobConfigurationException("Output table must be the same as the input table."); } // Producers should output to HFiles. mMapper = new ProduceMapper(); mReducer = new IdentityReducer<Object, Object>(); // Set the compression type for the generated HFiles to match the target locality group. final String outputColumn = mProducer.getOutputColumn(); final LocalityGroupLayout targetLocalityGroup = getTableLayout() .getFamilyMap() .get(new KijiColumnName(outputColumn).getFamily()) .getLocalityGroup(); conf.set( KijiHFileOutputFormat.CONF_HFILE_COMPRESSION, targetLocalityGroup.getDesc().getCompressionType().name().toLowerCase(Locale.US)); job.setJobName("Kiji produce: " + mProducerClass.getSimpleName()); // Configure the table input job. super.configureJob(job); }
/** * Constructor. * * @param context Task attempt context. * @param lgLayout Layout of the locality group. * @throws IOException on I/O error. */ public LocalityGroupRecordWriter(TaskAttemptContext context, LocalityGroupLayout lgLayout) throws IOException { mLGLayout = Preconditions.checkNotNull(lgLayout); mFamily = lgLayout.getId().toString(); // These parameters might be specific to each locality group: mMaxFileSizeBytes = mConf.getLong(CONF_HREGION_MAX_FILESIZE, DEFAULT_HREGION_MAX_FILESIZE); mBlockSizeBytes = mConf.getInt(CONF_HFILE_BLOCKSIZE, DEFAULT_HFILE_BLOCKSIZE); mFamilyDir = new Path(mOutputDir, mFamily); if (!mFileSystem.exists(mFamilyDir)) { if (!mFileSystem.mkdirs(mFamilyDir)) { throw new IOException( String.format("Unable to create output directory: %s", mFamilyDir)); } } mCompressionType = Compression.getCompressionAlgorithmByName( mLGLayout.getDesc().getCompressionType().toString().toLowerCase(Locale.ROOT)); mWriter = openNewWriter(); }
/** * Validates a new table layout against a reference layout for mutual compatibility. * * @param reference the reference layout against which to validate. * @param layout the new layout to validate. * @throws IOException in case of an IO Error reading from the schema table. Throws * InvalidLayoutException if the layouts are incompatible. */ public void validate(KijiTableLayout reference, KijiTableLayout layout) throws IOException { final ProtocolVersion layoutVersion = ProtocolVersion.parse(layout.getDesc().getVersion()); if (layoutVersion.compareTo(Versions.LAYOUT_VALIDATION_VERSION) < 0) { // Layout versions older than layout-1.3.0 do not require validation return; } // Accumulator for error messages which will be used to create an exception if errors occur. final List<String> incompatabilityMessages = Lists.newArrayList(); // Iterate through all families/columns in the new layout, // find a potential matching reference family/column, // and validate the reader/writer schema sets. // If no matching family/column exists in the reference layout the newly create column is valid. for (FamilyLayout flayout : layout.getFamilies()) { final ColumnId lgid = flayout.getLocalityGroup().getId(); LocalityGroupLayout refLGLayout = null; if (reference != null) { // If there is a reference layout, check for a locality group matching the ID of the LG for // this family. Locality Group IDs should not change between layouts. final String refLGName = reference.getLocalityGroupIdNameMap().get(lgid); if (refLGName != null) { // If there is a matching reference LG get its layout by name. refLGLayout = reference.getLocalityGroupMap().get(refLGName); } } // The ColumnId of the FamilyLayout from the table layout. Also matches the FamilyLayout for // this family in the reference layout if present. final ColumnId familyId = flayout.getId(); if (flayout.isMapType()) { // If the family is map-type, get the CellSchema for all values in the family. final CellSchema cellSchema = flayout.getDesc().getMapSchema(); FamilyLayout refFamilyLayout = null; if (refLGLayout != null) { // If there is a matching reference LG, check for the existence of this family. final String refFamilyName = refLGLayout.getFamilyIdNameMap().get(familyId); if (refFamilyName != null) { refFamilyLayout = refLGLayout.getFamilyMap().get(refFamilyName); } } if (refFamilyLayout != null) { if (refFamilyLayout.isMapType()) { // If the FamilyLayout from both table layouts are map type, compare their CellSchemas. final CellSchema refCellSchema = refFamilyLayout.getDesc().getMapSchema(); incompatabilityMessages.addAll( addColumnNamestoIncompatibilityMessages( flayout.getName(), null, validateCellSchema(refCellSchema, cellSchema))); } else if (refFamilyLayout.isGroupType()) { // If the FamilyLayout changed from group-type to map-type between table layout versions // that is an incompatible change. incompatabilityMessages.add( String.format( "Family: %s changed from group-type to map-type.", refFamilyLayout.getName())); } else { throw new InternalKijiError( String.format( "Family: %s is neither map-type nor group-type.", refFamilyLayout.getName())); } } else { // If the reference FamilyLayout is null this indicates a new family, which is inherently // compatible, but we still have to validate that the new readers and writers are // internally compatible. incompatabilityMessages.addAll( addColumnNamestoIncompatibilityMessages( flayout.getName(), null, validateCellSchema(null, cellSchema))); } } else if (flayout.isGroupType()) { // Check for a matching family from the reference layout. FamilyLayout refFamilyLayout = null; if (refLGLayout != null) { final String refFamilyName = refLGLayout.getFamilyIdNameMap().get(familyId); if (refFamilyName != null) { refFamilyLayout = refLGLayout.getFamilyMap().get(refFamilyName); } } if (refFamilyLayout != null) { if (refFamilyLayout.isGroupType()) { // If there is a matching reference family and it is the same family type, iterate // through the columns checking schema compatibility. Only checks columns from the new // layout because removed columns are inherently valid. for (ColumnLayout columnLayout : flayout.getColumns()) { final CellSchema cellSchema = columnLayout.getDesc().getColumnSchema(); final String refColumnName = refFamilyLayout.getColumnIdNameMap().get(columnLayout.getId()); ColumnLayout refColumnLayout = null; if (refColumnName != null) { // If there is a column from the reference layout with the same column ID, get its // layout. refColumnLayout = refFamilyLayout.getColumnMap().get(refColumnName); } // If there is a column from the reference layout with the same column ID, get its // CellSchema. final CellSchema refCellSchema = (refColumnLayout == null) ? null : refColumnLayout.getDesc().getColumnSchema(); // If there is no matching column, refCellSchema will be null and this will only test // that the new reader and writer schemas are internally compatible. incompatabilityMessages.addAll( addColumnNamestoIncompatibilityMessages( flayout.getName(), columnLayout.getName(), validateCellSchema(refCellSchema, cellSchema))); } } else if (refFamilyLayout.isMapType()) { // If the FamilyLayout changed from map-type to group-type between table layout versions // that is an incompatible change. incompatabilityMessages.add( String.format( "Family: %s changed from map-type to group-type.", refFamilyLayout.getName())); } else { throw new InternalKijiError( String.format( "Family: %s is neither map-type nor group-type.", refFamilyLayout.getName())); } } else { // If the reference FamilyLayout is null this indicates a new family, which is inherently // compatible, but we still have to validate that the new readers and writers are // internally compatible. for (ColumnLayout columnLayout : flayout.getColumns()) { final CellSchema cellSchema = columnLayout.getDesc().getColumnSchema(); incompatabilityMessages.addAll( addColumnNamestoIncompatibilityMessages( flayout.getName(), columnLayout.getName(), validateCellSchema(null, cellSchema))); } } } else { throw new InternalKijiError( String.format("Family: %s is neither map-type nor group-type.", flayout.getName())); } } // If there were any incompatibility errors, throw an exception. if (incompatabilityMessages.size() != 0) { throw new InvalidLayoutSchemaException(incompatabilityMessages); } }