@Override
  public void setup(Context context) {

    this.context = context;

    classIndex = ClassIndex.getInstance();

    Configuration conf = context.getConfiguration();

    float threshold = conf.getFloat(Util.CONF_MINIMUM_DF_OF_HOSTS, 0);

    try {

      Path headerPath = new Path(conf.get(Util.CONF_HEADER_PATH));

      LOG.info("Reading dataset header..." + headerPath.toString());

      header = new DataSetHeader(conf, headerPath);
      if (!classIndex.isPopulated()) {
        classIndex.init(conf);
        classIndex.populateIndex();
      }
      if (threshold > 0.0) {
        LOG.info("loading DF values");
        hostsWithMinimumDF = Util.getHostsWithDocumentFrequencies(conf, threshold);
      }

      LOG.info("loading training data...");

      loadTrainingInstances(conf.get(Util.CONF_TRAINING_DATE));

    } catch (Exception e) {
      LOG.error("setup failed with an exception!");
      e.printStackTrace();
      setupFailedException = e;
    }
  }
  @Override
  public void map(LongWritable key, ObjectWritable obj, Context context)
      throws IOException, InterruptedException {

    if (setupFailedException != null) {
      throw new IOException(setupFailedException);
    }

    DoubleWritable classI = new DoubleWritable();
    DoubleWritable value = new DoubleWritable();

    lastTime = System.currentTimeMillis();

    Instance instance;

    instance = (Instance) obj.get();

    // remove all hosts whose DF is below the threshold
    if (hostsWithMinimumDF != null) {
      instance.setAcceptableIndices(hostsWithMinimumDF.keySet());
    }

    // loop through training instances
    for (Instance trainingInstance : trainingInstances) {
      try {
        float jaccardValue = jaccardValue(trainingInstance, instance);

        int trainingClassId = classIndex.getIndexPosition(trainingInstance.getClassLabel());

        classI.set(trainingClassId);
        value.set(jaccardValue);
        // store it in an array with the classIndex
        array.set(new DoubleWritable[] {classI, value});

        // and hand it to the reducer
        context.write(new Text(instance.getId()), array);
      } catch (Exception e) {
        e.printStackTrace();
        LOG.error("map failed with exception");
        throw new IOException(e);
      }
    }

    // count the number of instances per class
    // context.write(new Text(Util.INSTANCES_PER_CLASS_PATH + " " +
    // instance.getClassLabel()), ONE);

    counter++;

    long timeTaken = System.currentTimeMillis() - lastTime;
    totalTime += timeTaken;

    if ((counter % 10) == 0) {
      // print out some performance stuff
      LOG.info(
          "instance "
              + counter
              + " duration: "
              + ((double) timeTaken / 1000)
              + " s - avg : "
              + ((double) (totalTime / counter) / 1000)
              + " s"
              + " num_values: "
              + instance.getNumValues());
    }

    double duration = ((double) timeTaken / 1000);
    if (duration > REPORT_SLOW_INSTANCE_THRESHOLD) {
      LOG.info(
          "Mapped a particularly SLOW INSTANCE. classLabel: "
              + instance.getClassLabel()
              + ", "
              + "duration: "
              + duration
              + " s ("
              + duration / 60
              + " min),"
              + " num_values: "
              + instance.getNumValues());
    }
  }