@Test public void testOptions() { final Set<Option> options = new HashSet<Option>(); jjJobRunner.fillOptions(options); assertTrue( PropertyManagement.hasOption(options, CommonParameters.Common.DISTANCE_FUNCTION_CLASS)); assertTrue(PropertyManagement.hasOption(options, Partition.MAX_MEMBER_SELECTION)); assertTrue(PropertyManagement.hasOption(options, Partition.PARTITION_DISTANCE)); assertTrue(PropertyManagement.hasOption(options, Partition.PARTITIONER_CLASS)); /* * * Should this be part of the test? When options are requested, the * runner does not know the selected partition algorithm. * * assertTrue(PropertyManagement.hasOption( options, * GlobalParameters.Global.CRS_ID)); * * assertTrue(PropertyManagement.hasOption( options, * ExtractParameters.Extract.DIMENSION_EXTRACT_CLASS)); * * assertTrue(PropertyManagement.hasOption( options, * ClusteringParameters.Clustering.GEOMETRIC_DISTANCE_UNIT)); * * assertTrue(PropertyManagement.hasOption( options, * CommonParameters.Common.INDEX_MODEL_BUILDER_CLASS)); * * assertTrue(PropertyManagement.hasOption( options, * ClusteringParameters.Clustering.DISTANCE_THRESHOLDS)); */ }
@Override public int run(final Configuration config, final PropertyManagement runTimeProperties) throws Exception { final int k = runTimeProperties.getPropertyAsInt(SampleParameters.Sample.SAMPLE_SIZE, 1); final int minkplusplus = runTimeProperties.getPropertyAsInt(JumpParameters.Jump.KPLUSPLUS_MIN, 3); if (k >= minkplusplus) { return parallelJobRunner.run(config, runTimeProperties); } else { return singleSamplekmeansJobRunner.run(config, runTimeProperties); } }
@Override public Object getValue(final PropertyManagement propertyManagement) { try { return propertyManagement.getProperty(parent); } catch (final Exception e) { LOGGER.error("Unable to deserialize property '" + parent.toString() + "'", e); return null; } }
public static final void fillOptions(Set<Option> options, Common[] params) { if (contains(params, Common.DIMENSION_EXTRACT_CLASS)) { options.add( PropertyManagement.newOption( Common.DIMENSION_EXTRACT_CLASS, "dde", "Dimension Extractor Class implements mil.nga.giat.geowave.analytics.extract.DimensionExtractor", true)); } if (contains(params, Common.ADAPTER_STORE_FACTORY)) { options.add( PropertyManagement.newOption( Common.ADAPTER_STORE_FACTORY, "caf", "Adapter Store factory implements mil.nga.giat.geowave.analytics.tools.dbops.AdapterStoreFactory", true)); } if (contains(params, Common.INDEX_STORE_FACTORY)) { options.add( PropertyManagement.newOption( Common.INDEX_STORE_FACTORY, "cif", "Index Store factory implements mil.nga.giat.geowave.analytics.tools.dbops.IndexStoreFactory", true)); } if (contains(params, Common.DISTANCE_FUNCTION_CLASS)) { options.add( PropertyManagement.newOption( Common.DISTANCE_FUNCTION_CLASS, "cdf", "Distance Function Class implements mil.nga.giat.geowave.analytics.distance.DistanceFn", true)); } if (contains(params, Common.INDEX_MODEL_BUILDER_CLASS)) { options.add( PropertyManagement.newOption( Common.INDEX_MODEL_BUILDER_CLASS, "cim", "Class implements mil.nga.giat.geowave.analytics.tools.model.IndexModelBuilder", true)); } }
@Override public void setValue(final PropertyManagement propertyManagement, final Object value) { Object storeValue = value; if (this.isClass && value instanceof String) { try { storeValue = Class.forName(value.toString()); } catch (ClassNotFoundException e) { LOGGER.error("Class " + value.toString() + " for property " + parent + " is not found", e); } } propertyManagement.store(parent, storeValue); }
@Override @SuppressWarnings("unchecked") public int run(final Configuration configuration, final PropertyManagement propertyManagement) throws Exception { propertyManagement.store(CentroidParameters.Centroid.ZOOM_LEVEL, currentZoomLevel); propertyManagement.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString()); propertyManagement.storeIfEmpty( CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, SimpleFeatureItemWrapperFactory.class); propertyManagement.storeIfEmpty( CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class); propertyManagement.storeIfEmpty( CentroidParameters.Centroid.EXTRACTOR_CLASS, SimpleFeatureCentroidExtractor.class); propertyManagement.storeIfEmpty( CommonParameters.Common.DIMENSION_EXTRACT_CLASS, SimpleFeatureGeometryExtractor.class); propertyManagement.copy( CentroidParameters.Centroid.DATA_TYPE_ID, SampleParameters.Sample.DATA_TYPE_ID); propertyManagement.copy(CentroidParameters.Centroid.INDEX_ID, SampleParameters.Sample.INDEX_ID); ClusteringUtils.createAdapter(propertyManagement); ClusteringUtils.createIndex(propertyManagement); final String currentBatchId = propertyManagement.getPropertyAsString( GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString()); try { final NumericRange rangeOfIterations = propertyManagement.getPropertyAsRange( JumpParameters.Jump.RANGE_OF_CENTROIDS, new NumericRange(2, 200)); propertyManagement.store(GlobalParameters.Global.PARENT_BATCH_ID, currentBatchId); final GenericStoreCommandLineOptions<DataStore> dataStoreOptions = ((PersistableDataStore) propertyManagement.getProperty(StoreParam.DATA_STORE)) .getCliOptions(); final GenericStoreCommandLineOptions<IndexStore> indexStoreOptions = ((PersistableIndexStore) propertyManagement.getProperty(StoreParam.INDEX_STORE)) .getCliOptions(); final GenericStoreCommandLineOptions<AdapterStore> adapterStoreOptions = ((PersistableAdapterStore) propertyManagement.getProperty(StoreParam.ADAPTER_STORE)) .getCliOptions(); final DistortionGroupManagement distortionGroupManagement = new DistortionGroupManagement( dataStoreOptions.createStore(), indexStoreOptions.createStore(), adapterStoreOptions.createStore()); for (int k = (int) Math.max(2, Math.round(rangeOfIterations.getMin())); k < Math.round(rangeOfIterations.getMax()); k++) { // regardless of the algorithm, the sample set is fixed in size propertyManagement.store(SampleParameters.Sample.MIN_SAMPLE_SIZE, k); propertyManagement.store(SampleParameters.Sample.MAX_SAMPLE_SIZE, k); propertyManagement.store(SampleParameters.Sample.SAMPLE_SIZE, k); jumpRunner.setCentroidsCount(k); jumpRunner.setDataStoreOptions(dataStoreOptions); final String iterationBatchId = currentBatchId + "_" + k; propertyManagement.store(GlobalParameters.Global.BATCH_ID, iterationBatchId); jumpRunner.setReducerCount(k); LOGGER.info("KMeans for k: " + k + " and batch " + currentBatchId); final int status = super.run(configuration, propertyManagement); if (status != 0) { return status; } } propertyManagement.store(GlobalParameters.Global.BATCH_ID, currentBatchId); @SuppressWarnings("rawtypes") final Class<AnalyticItemWrapperFactory> analyticItemWrapperFC = propertyManagement.getPropertyAsClass( CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class); /** * Associate the batch id with the best set of groups so the caller can find the clusters for * the given batch */ final int result = distortionGroupManagement.retainBestGroups( (AnalyticItemWrapperFactory<SimpleFeature>) analyticItemWrapperFC.newInstance(), propertyManagement.getPropertyAsString(CentroidParameters.Centroid.DATA_TYPE_ID), propertyManagement.getPropertyAsString(CentroidParameters.Centroid.INDEX_ID), currentBatchId, currentZoomLevel); // distortionGroupManagement.cleanUp(); return result; } catch (final Exception ex) { LOGGER.error("Cannot create distortions", ex); return 1; } }
@Override public int run(final Configuration config, final PropertyManagement runTimeProperties) throws Exception { runTimeProperties.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString()); final FileSystem fs = FileSystem.get(config); final String outputBaseDir = runTimeProperties.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp"); Path startPath = new Path( outputBaseDir + "/" + runTimeProperties.getPropertyAsString(GlobalParameters.Global.ACCUMULO_NAMESPACE) + "_level_0"); if (fs.exists(startPath)) { fs.delete(startPath, true); } AbstractPartitioner.putDistances( runTimeProperties, new double[] {runTimeProperties.getPropertyAsDouble(Partition.PARTITION_DISTANCE, 10)}); jobRunner.setInputFormatConfiguration(inputFormatConfiguration); jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(startPath)); LOGGER.info( "Running with partition distance {}", runTimeProperties.getPropertyAsDouble(Partition.PARTITION_DISTANCE, 10.0)); final int initialStatus = jobRunner.run(config, runTimeProperties); if (initialStatus != 0) { return initialStatus; } int maxIterationCount = runTimeProperties.getPropertyAsInt(ClusteringParameters.Clustering.MAX_ITERATIONS, 15); int iteration = 2; long lastRecordCount = 0; double precisionFactor = 0.9; while (maxIterationCount > 0) { // context does not mater in this case try { final Partitioner<?> partitioner = runTimeProperties.getClassInstance( PartitionParameters.Partition.PARTITIONER_CLASS, Partitioner.class, OrthodromicDistancePartitioner.class); partitioner.initialize(runTimeProperties); } catch (final IllegalArgumentException argEx) { // this occurs if the partitioner decides that the distance is // invalid (e.g. bigger than the map space). // In this case, we just exist out of the loop. // startPath has the final data break; } catch (final Exception e1) { throw new IOException(e1); } final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties); localScopeProperties.store(Partition.PARTITION_PRECISION, precisionFactor); jobRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath)); jobRunner.setFirstIteration(false); localScopeProperties.store(HullParameters.Hull.ZOOM_LEVEL, zoomLevel); localScopeProperties.store(HullParameters.Hull.ITERATION, iteration); localScopeProperties.storeIfEmpty( OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString( HullParameters.Hull.DATA_TYPE_ID, "concave_hull")); // Set to zero to force each cluster to be moved into the next // iteration // even if no merge occurs localScopeProperties.store(ClusteringParameters.Clustering.MINIMUM_SIZE, 0); final Path nextPath = new Path( outputBaseDir + "/" + runTimeProperties.getPropertyAsString( GlobalParameters.Global.ACCUMULO_NAMESPACE) + "_level_" + iteration); if (fs.exists(nextPath)) { fs.delete(nextPath, true); } jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath)); final int status = jobRunner.run(config, localScopeProperties); if (status != 0) { return status; } final long currentOutputCount = jobRunner.getCounterValue(TaskCounter.REDUCE_OUTPUT_RECORDS); if (currentOutputCount == lastRecordCount) { maxIterationCount = 0; } lastRecordCount = currentOutputCount; startPath = nextPath; maxIterationCount--; precisionFactor -= 0.1; iteration++; } final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties); localScopeProperties.storeIfEmpty( OutputParameters.Output.DATA_TYPE_ID, localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull")); localScopeProperties.storeIfEmpty( OutputParameters.Output.DATA_NAMESPACE_URI, localScopeProperties.getPropertyAsString( HullParameters.Hull.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE)); localScopeProperties.storeIfEmpty( OutputParameters.Output.INDEX_ID, localScopeProperties.get(HullParameters.Hull.INDEX_ID)); inputLoadRunner.setInputFormatConfiguration( new SequenceFileInputFormatConfiguration(startPath)); inputLoadRunner.run(config, runTimeProperties); return 0; }
@Before public void init() { jjJobRunner.setMapReduceIntegrater( new MapReduceIntegration() { @Override public int submit( final Configuration configuration, final PropertyManagement runTimeProperties, final GeoWaveAnalyticJobRunner tool) throws Exception { tool.setConf(configuration); FeatureDataAdapterStoreFactory.transferState(configuration, runTimeProperties); return tool.run(runTimeProperties.toGeoWaveRunnerArguments()); } @Override public Counters waitForCompletion(final Job job) throws ClassNotFoundException, IOException, InterruptedException { Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass()); Assert.assertEquals(10, job.getNumReduceTasks()); final JobContextConfigurationWrapper configWrapper = new JobContextConfigurationWrapper(job); Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir")); Assert.assertEquals( 0.4, configWrapper.getDouble(Partition.PARTITION_DISTANCE, NNMapReduce.class, 0.0), 0.001); Assert.assertEquals( 100, configWrapper.getInt(Partition.MAX_MEMBER_SELECTION, NNMapReduce.class, 1)); try { final Partitioner<?> wrapper = configWrapper.getInstance( Partition.PARTITIONER_CLASS, NNMapReduce.class, Partitioner.class, null); Assert.assertEquals(OrthodromicDistancePartitioner.class, wrapper.getClass()); final Partitioner<?> secondary = configWrapper.getInstance( Partition.SECONDARY_PARTITIONER_CLASS, NNMapReduce.class, Partitioner.class, null); Assert.assertEquals(OrthodromicDistancePartitioner.class, secondary.getClass()); final DistanceFn<?> distancFn = configWrapper.getInstance( CommonParameters.Common.DISTANCE_FUNCTION_CLASS, NNMapReduce.class, DistanceFn.class, GeometryCentroidDistanceFn.class); Assert.assertEquals(FeatureCentroidDistanceFn.class, distancFn.getClass()); } catch (final InstantiationException e) { throw new IOException("Unable to configure system", e); } catch (final IllegalAccessException e) { throw new IOException("Unable to configure system", e); } Assert.assertEquals(10, job.getNumReduceTasks()); return new Counters(); } @Override public Job getJob(final Tool tool) throws IOException { return new Job(tool.getConf()); } }); jjJobRunner.setInputFormatConfiguration( new SequenceFileInputFormatConfiguration(new Path("file://foo/bin"))); jjJobRunner.setReducerCount(10); runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/"); runTimeProperties.store(GlobalParameters.Global.ZOOKEEKER, "localhost:3000"); runTimeProperties.store(GlobalParameters.Global.ACCUMULO_INSTANCE, "accumulo"); runTimeProperties.store(GlobalParameters.Global.ACCUMULO_USER, "root"); runTimeProperties.store(GlobalParameters.Global.ACCUMULO_PASSWORD, "pwd"); runTimeProperties.store(GlobalParameters.Global.ACCUMULO_NAMESPACE, "test"); runTimeProperties.store( CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class); runTimeProperties.store(Partition.PARTITIONER_CLASS, OrthodromicDistancePartitioner.class); runTimeProperties.store( Partition.SECONDARY_PARTITIONER_CLASS, OrthodromicDistancePartitioner.class); runTimeProperties.store(Partition.PARTITION_DISTANCE, Double.valueOf(0.4)); runTimeProperties.store(Partition.MAX_MEMBER_SELECTION, Integer.valueOf(100)); }