Пример #1
0
  @Test
  public void testOptions() {
    final Set<Option> options = new HashSet<Option>();
    jjJobRunner.fillOptions(options);

    assertTrue(
        PropertyManagement.hasOption(options, CommonParameters.Common.DISTANCE_FUNCTION_CLASS));

    assertTrue(PropertyManagement.hasOption(options, Partition.MAX_MEMBER_SELECTION));

    assertTrue(PropertyManagement.hasOption(options, Partition.PARTITION_DISTANCE));

    assertTrue(PropertyManagement.hasOption(options, Partition.PARTITIONER_CLASS));

    /*
     *
     * Should this be part of the test? When options are requested, the
     * runner does not know the selected partition algorithm.
     *
     * assertTrue(PropertyManagement.hasOption( options,
     * GlobalParameters.Global.CRS_ID));
     *
     * assertTrue(PropertyManagement.hasOption( options,
     * ExtractParameters.Extract.DIMENSION_EXTRACT_CLASS));
     *
     * assertTrue(PropertyManagement.hasOption( options,
     * ClusteringParameters.Clustering.GEOMETRIC_DISTANCE_UNIT));
     *
     * assertTrue(PropertyManagement.hasOption( options,
     * CommonParameters.Common.INDEX_MODEL_BUILDER_CLASS));
     *
     * assertTrue(PropertyManagement.hasOption( options,
     * ClusteringParameters.Clustering.DISTANCE_THRESHOLDS));
     */
  }
 @Override
 public int run(final Configuration config, final PropertyManagement runTimeProperties)
     throws Exception {
   final int k = runTimeProperties.getPropertyAsInt(SampleParameters.Sample.SAMPLE_SIZE, 1);
   final int minkplusplus =
       runTimeProperties.getPropertyAsInt(JumpParameters.Jump.KPLUSPLUS_MIN, 3);
   if (k >= minkplusplus) {
     return parallelJobRunner.run(config, runTimeProperties);
   } else {
     return singleSamplekmeansJobRunner.run(config, runTimeProperties);
   }
 }
Пример #3
0
 @Override
 public Object getValue(final PropertyManagement propertyManagement) {
   try {
     return propertyManagement.getProperty(parent);
   } catch (final Exception e) {
     LOGGER.error("Unable to deserialize property '" + parent.toString() + "'", e);
     return null;
   }
 }
Пример #4
0
 public static final void fillOptions(Set<Option> options, Common[] params) {
   if (contains(params, Common.DIMENSION_EXTRACT_CLASS)) {
     options.add(
         PropertyManagement.newOption(
             Common.DIMENSION_EXTRACT_CLASS,
             "dde",
             "Dimension Extractor Class implements mil.nga.giat.geowave.analytics.extract.DimensionExtractor",
             true));
   }
   if (contains(params, Common.ADAPTER_STORE_FACTORY)) {
     options.add(
         PropertyManagement.newOption(
             Common.ADAPTER_STORE_FACTORY,
             "caf",
             "Adapter Store factory implements mil.nga.giat.geowave.analytics.tools.dbops.AdapterStoreFactory",
             true));
   }
   if (contains(params, Common.INDEX_STORE_FACTORY)) {
     options.add(
         PropertyManagement.newOption(
             Common.INDEX_STORE_FACTORY,
             "cif",
             "Index Store factory implements mil.nga.giat.geowave.analytics.tools.dbops.IndexStoreFactory",
             true));
   }
   if (contains(params, Common.DISTANCE_FUNCTION_CLASS)) {
     options.add(
         PropertyManagement.newOption(
             Common.DISTANCE_FUNCTION_CLASS,
             "cdf",
             "Distance Function Class implements mil.nga.giat.geowave.analytics.distance.DistanceFn",
             true));
   }
   if (contains(params, Common.INDEX_MODEL_BUILDER_CLASS)) {
     options.add(
         PropertyManagement.newOption(
             Common.INDEX_MODEL_BUILDER_CLASS,
             "cim",
             "Class implements mil.nga.giat.geowave.analytics.tools.model.IndexModelBuilder",
             true));
   }
 }
Пример #5
0
 @Override
 public void setValue(final PropertyManagement propertyManagement, final Object value) {
   Object storeValue = value;
   if (this.isClass && value instanceof String) {
     try {
       storeValue = Class.forName(value.toString());
     } catch (ClassNotFoundException e) {
       LOGGER.error("Class " + value.toString() + " for property " + parent + " is not found", e);
     }
   }
   propertyManagement.store(parent, storeValue);
 }
  @Override
  @SuppressWarnings("unchecked")
  public int run(final Configuration configuration, final PropertyManagement propertyManagement)
      throws Exception {

    propertyManagement.store(CentroidParameters.Centroid.ZOOM_LEVEL, currentZoomLevel);

    propertyManagement.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());

    propertyManagement.storeIfEmpty(
        CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, SimpleFeatureItemWrapperFactory.class);
    propertyManagement.storeIfEmpty(
        CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);
    propertyManagement.storeIfEmpty(
        CentroidParameters.Centroid.EXTRACTOR_CLASS, SimpleFeatureCentroidExtractor.class);
    propertyManagement.storeIfEmpty(
        CommonParameters.Common.DIMENSION_EXTRACT_CLASS, SimpleFeatureGeometryExtractor.class);

    propertyManagement.copy(
        CentroidParameters.Centroid.DATA_TYPE_ID, SampleParameters.Sample.DATA_TYPE_ID);

    propertyManagement.copy(CentroidParameters.Centroid.INDEX_ID, SampleParameters.Sample.INDEX_ID);

    ClusteringUtils.createAdapter(propertyManagement);
    ClusteringUtils.createIndex(propertyManagement);

    final String currentBatchId =
        propertyManagement.getPropertyAsString(
            GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());

    try {

      final NumericRange rangeOfIterations =
          propertyManagement.getPropertyAsRange(
              JumpParameters.Jump.RANGE_OF_CENTROIDS, new NumericRange(2, 200));
      propertyManagement.store(GlobalParameters.Global.PARENT_BATCH_ID, currentBatchId);

      final GenericStoreCommandLineOptions<DataStore> dataStoreOptions =
          ((PersistableDataStore) propertyManagement.getProperty(StoreParam.DATA_STORE))
              .getCliOptions();
      final GenericStoreCommandLineOptions<IndexStore> indexStoreOptions =
          ((PersistableIndexStore) propertyManagement.getProperty(StoreParam.INDEX_STORE))
              .getCliOptions();
      final GenericStoreCommandLineOptions<AdapterStore> adapterStoreOptions =
          ((PersistableAdapterStore) propertyManagement.getProperty(StoreParam.ADAPTER_STORE))
              .getCliOptions();

      final DistortionGroupManagement distortionGroupManagement =
          new DistortionGroupManagement(
              dataStoreOptions.createStore(),
              indexStoreOptions.createStore(),
              adapterStoreOptions.createStore());

      for (int k = (int) Math.max(2, Math.round(rangeOfIterations.getMin()));
          k < Math.round(rangeOfIterations.getMax());
          k++) {

        // regardless of the algorithm, the sample set is fixed in size
        propertyManagement.store(SampleParameters.Sample.MIN_SAMPLE_SIZE, k);
        propertyManagement.store(SampleParameters.Sample.MAX_SAMPLE_SIZE, k);
        propertyManagement.store(SampleParameters.Sample.SAMPLE_SIZE, k);

        jumpRunner.setCentroidsCount(k);
        jumpRunner.setDataStoreOptions(dataStoreOptions);
        final String iterationBatchId = currentBatchId + "_" + k;
        propertyManagement.store(GlobalParameters.Global.BATCH_ID, iterationBatchId);
        jumpRunner.setReducerCount(k);
        LOGGER.info("KMeans for k: " + k + " and batch " + currentBatchId);
        final int status = super.run(configuration, propertyManagement);
        if (status != 0) {
          return status;
        }
      }
      propertyManagement.store(GlobalParameters.Global.BATCH_ID, currentBatchId);

      @SuppressWarnings("rawtypes")
      final Class<AnalyticItemWrapperFactory> analyticItemWrapperFC =
          propertyManagement.getPropertyAsClass(
              CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class);

      /**
       * Associate the batch id with the best set of groups so the caller can find the clusters for
       * the given batch
       */
      final int result =
          distortionGroupManagement.retainBestGroups(
              (AnalyticItemWrapperFactory<SimpleFeature>) analyticItemWrapperFC.newInstance(),
              propertyManagement.getPropertyAsString(CentroidParameters.Centroid.DATA_TYPE_ID),
              propertyManagement.getPropertyAsString(CentroidParameters.Centroid.INDEX_ID),
              currentBatchId,
              currentZoomLevel);

      // distortionGroupManagement.cleanUp();

      return result;
    } catch (final Exception ex) {
      LOGGER.error("Cannot create distortions", ex);
      return 1;
    }
  }
  @Override
  public int run(final Configuration config, final PropertyManagement runTimeProperties)
      throws Exception {

    runTimeProperties.storeIfEmpty(GlobalParameters.Global.BATCH_ID, UUID.randomUUID().toString());

    final FileSystem fs = FileSystem.get(config);

    final String outputBaseDir =
        runTimeProperties.getPropertyAsString(MapReduceParameters.MRConfig.HDFS_BASE_DIR, "/tmp");

    Path startPath =
        new Path(
            outputBaseDir
                + "/"
                + runTimeProperties.getPropertyAsString(GlobalParameters.Global.ACCUMULO_NAMESPACE)
                + "_level_0");
    if (fs.exists(startPath)) {
      fs.delete(startPath, true);
    }

    AbstractPartitioner.putDistances(
        runTimeProperties,
        new double[] {runTimeProperties.getPropertyAsDouble(Partition.PARTITION_DISTANCE, 10)});

    jobRunner.setInputFormatConfiguration(inputFormatConfiguration);
    jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(startPath));

    LOGGER.info(
        "Running with partition distance {}",
        runTimeProperties.getPropertyAsDouble(Partition.PARTITION_DISTANCE, 10.0));
    final int initialStatus = jobRunner.run(config, runTimeProperties);

    if (initialStatus != 0) {
      return initialStatus;
    }

    int maxIterationCount =
        runTimeProperties.getPropertyAsInt(ClusteringParameters.Clustering.MAX_ITERATIONS, 15);

    int iteration = 2;
    long lastRecordCount = 0;
    double precisionFactor = 0.9;
    while (maxIterationCount > 0) {

      // context does not mater in this case

      try {
        final Partitioner<?> partitioner =
            runTimeProperties.getClassInstance(
                PartitionParameters.Partition.PARTITIONER_CLASS,
                Partitioner.class,
                OrthodromicDistancePartitioner.class);

        partitioner.initialize(runTimeProperties);
      } catch (final IllegalArgumentException argEx) {
        // this occurs if the partitioner decides that the distance is
        // invalid (e.g. bigger than the map space).
        // In this case, we just exist out of the loop.
        // startPath has the final data
        break;
      } catch (final Exception e1) {
        throw new IOException(e1);
      }

      final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);

      localScopeProperties.store(Partition.PARTITION_PRECISION, precisionFactor);
      jobRunner.setInputFormatConfiguration(new SequenceFileInputFormatConfiguration(startPath));

      jobRunner.setFirstIteration(false);

      localScopeProperties.store(HullParameters.Hull.ZOOM_LEVEL, zoomLevel);

      localScopeProperties.store(HullParameters.Hull.ITERATION, iteration);

      localScopeProperties.storeIfEmpty(
          OutputParameters.Output.DATA_TYPE_ID,
          localScopeProperties.getPropertyAsString(
              HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));

      // Set to zero to force each cluster to be moved into the next
      // iteration
      // even if no merge occurs
      localScopeProperties.store(ClusteringParameters.Clustering.MINIMUM_SIZE, 0);

      final Path nextPath =
          new Path(
              outputBaseDir
                  + "/"
                  + runTimeProperties.getPropertyAsString(
                      GlobalParameters.Global.ACCUMULO_NAMESPACE)
                  + "_level_"
                  + iteration);

      if (fs.exists(nextPath)) {
        fs.delete(nextPath, true);
      }
      jobRunner.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration(nextPath));

      final int status = jobRunner.run(config, localScopeProperties);

      if (status != 0) {
        return status;
      }

      final long currentOutputCount = jobRunner.getCounterValue(TaskCounter.REDUCE_OUTPUT_RECORDS);
      if (currentOutputCount == lastRecordCount) {
        maxIterationCount = 0;
      }
      lastRecordCount = currentOutputCount;
      startPath = nextPath;
      maxIterationCount--;
      precisionFactor -= 0.1;
      iteration++;
    }
    final PropertyManagement localScopeProperties = new PropertyManagement(runTimeProperties);

    localScopeProperties.storeIfEmpty(
        OutputParameters.Output.DATA_TYPE_ID,
        localScopeProperties.getPropertyAsString(HullParameters.Hull.DATA_TYPE_ID, "concave_hull"));
    localScopeProperties.storeIfEmpty(
        OutputParameters.Output.DATA_NAMESPACE_URI,
        localScopeProperties.getPropertyAsString(
            HullParameters.Hull.DATA_NAMESPACE_URI, BasicFeatureTypes.DEFAULT_NAMESPACE));
    localScopeProperties.storeIfEmpty(
        OutputParameters.Output.INDEX_ID, localScopeProperties.get(HullParameters.Hull.INDEX_ID));
    inputLoadRunner.setInputFormatConfiguration(
        new SequenceFileInputFormatConfiguration(startPath));
    inputLoadRunner.run(config, runTimeProperties);

    return 0;
  }
Пример #8
0
  @Before
  public void init() {
    jjJobRunner.setMapReduceIntegrater(
        new MapReduceIntegration() {
          @Override
          public int submit(
              final Configuration configuration,
              final PropertyManagement runTimeProperties,
              final GeoWaveAnalyticJobRunner tool)
              throws Exception {
            tool.setConf(configuration);
            FeatureDataAdapterStoreFactory.transferState(configuration, runTimeProperties);
            return tool.run(runTimeProperties.toGeoWaveRunnerArguments());
          }

          @Override
          public Counters waitForCompletion(final Job job)
              throws ClassNotFoundException, IOException, InterruptedException {

            Assert.assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass());
            Assert.assertEquals(10, job.getNumReduceTasks());
            final JobContextConfigurationWrapper configWrapper =
                new JobContextConfigurationWrapper(job);
            Assert.assertEquals("file://foo/bin", job.getConfiguration().get("mapred.input.dir"));

            Assert.assertEquals(
                0.4,
                configWrapper.getDouble(Partition.PARTITION_DISTANCE, NNMapReduce.class, 0.0),
                0.001);

            Assert.assertEquals(
                100, configWrapper.getInt(Partition.MAX_MEMBER_SELECTION, NNMapReduce.class, 1));

            try {
              final Partitioner<?> wrapper =
                  configWrapper.getInstance(
                      Partition.PARTITIONER_CLASS, NNMapReduce.class, Partitioner.class, null);

              Assert.assertEquals(OrthodromicDistancePartitioner.class, wrapper.getClass());

              final Partitioner<?> secondary =
                  configWrapper.getInstance(
                      Partition.SECONDARY_PARTITIONER_CLASS,
                      NNMapReduce.class,
                      Partitioner.class,
                      null);

              Assert.assertEquals(OrthodromicDistancePartitioner.class, secondary.getClass());

              final DistanceFn<?> distancFn =
                  configWrapper.getInstance(
                      CommonParameters.Common.DISTANCE_FUNCTION_CLASS,
                      NNMapReduce.class,
                      DistanceFn.class,
                      GeometryCentroidDistanceFn.class);

              Assert.assertEquals(FeatureCentroidDistanceFn.class, distancFn.getClass());

            } catch (final InstantiationException e) {
              throw new IOException("Unable to configure system", e);
            } catch (final IllegalAccessException e) {
              throw new IOException("Unable to configure system", e);
            }

            Assert.assertEquals(10, job.getNumReduceTasks());

            return new Counters();
          }

          @Override
          public Job getJob(final Tool tool) throws IOException {
            return new Job(tool.getConf());
          }
        });

    jjJobRunner.setInputFormatConfiguration(
        new SequenceFileInputFormatConfiguration(new Path("file://foo/bin")));
    jjJobRunner.setReducerCount(10);

    runTimeProperties.store(MRConfig.HDFS_BASE_DIR, "/");

    runTimeProperties.store(GlobalParameters.Global.ZOOKEEKER, "localhost:3000");

    runTimeProperties.store(GlobalParameters.Global.ACCUMULO_INSTANCE, "accumulo");
    runTimeProperties.store(GlobalParameters.Global.ACCUMULO_USER, "root");
    runTimeProperties.store(GlobalParameters.Global.ACCUMULO_PASSWORD, "pwd");
    runTimeProperties.store(GlobalParameters.Global.ACCUMULO_NAMESPACE, "test");

    runTimeProperties.store(
        CommonParameters.Common.DISTANCE_FUNCTION_CLASS, FeatureCentroidDistanceFn.class);

    runTimeProperties.store(Partition.PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);

    runTimeProperties.store(
        Partition.SECONDARY_PARTITIONER_CLASS, OrthodromicDistancePartitioner.class);

    runTimeProperties.store(Partition.PARTITION_DISTANCE, Double.valueOf(0.4));

    runTimeProperties.store(Partition.MAX_MEMBER_SELECTION, Integer.valueOf(100));
  }