@Override protected void setup( final Reducer<Text, CountofDoubleWritable, GeoWaveOutputKey, DistortionEntry>.Context context) throws IOException, InterruptedException { super.setup(context); final ScopedJobConfiguration config = new ScopedJobConfiguration( context.getConfiguration(), KMeansDistortionMapReduce.class, KMeansDistortionMapReduce.LOGGER); final int k = config.getInt(JumpParameters.Jump.COUNT_OF_CENTROIDS, -1); if (k > 0) { expectedK = k; } try { centroidManager = new CentroidManagerGeoWave<Object>( context, KMeansDistortionMapReduce.class, KMeansDistortionMapReduce.LOGGER); } catch (final Exception e) { KMeansDistortionMapReduce.LOGGER.warn("Unable to initialize centroid manager", e); throw new IOException("Unable to initialize centroid manager", e); } batchId = config.getString(GlobalParameters.Global.PARENT_BATCH_ID, centroidManager.getBatchId()); }
@Override public void reduce( final Text key, final Iterable<CountofDoubleWritable> values, final Reducer<Text, CountofDoubleWritable, GeoWaveOutputKey, DistortionEntry>.Context context) throws IOException, InterruptedException { double expectation = 0.0; final List<AnalyticItemWrapper<Object>> centroids = centroidManager.getCentroidsForGroup(key.toString()); // it is possible that the number of items in a group are smaller // than the cluster final Integer kCount; if (expectedK == null) { kCount = centroids.size(); } else { kCount = expectedK; } if (centroids.size() == 0) { return; } final double numDimesions = 2 + centroids.get(0).getExtraDimensions().length; double ptCount = 0; for (final CountofDoubleWritable value : values) { expectation += value.getValue(); ptCount += value.getCount(); } if (ptCount > 0) { expectation /= ptCount; final Double distortion = Math.pow(expectation / numDimesions, -(numDimesions / 2)); final DistortionEntry entry = new DistortionEntry(key.toString(), batchId, kCount, distortion); context.write( new GeoWaveOutputKey( DistortionDataAdapter.ADAPTER_ID, DistortionGroupManagement.DISTORTIONS_INDEX_LIST), entry); } }