/** Build a supervised model. */
  public MLModel build() throws MLModelBuilderException {
    MLModelConfigurationContext context = getContext();
    JavaSparkContext sparkContext = null;
    DatabaseService databaseService = MLCoreServiceValueHolder.getInstance().getDatabaseService();
    MLModel mlModel = new MLModel();
    try {
      sparkContext = context.getSparkContext();
      Workflow workflow = context.getFacts();
      long modelId = context.getModelId();

      // Verify validity of response variable
      String typeOfResponseVariable =
          getTypeOfResponseVariable(workflow.getResponseVariable(), workflow.getFeatures());

      if (typeOfResponseVariable == null) {
        throw new MLModelBuilderException(
            "Type of response variable cannot be null for supervised learning " + "algorithms.");
      }

      // Stops model building if a categorical attribute is used with numerical prediction
      if (workflow.getAlgorithmClass().equals(AlgorithmType.NUMERICAL_PREDICTION.getValue())
          && typeOfResponseVariable.equals(FeatureType.CATEGORICAL)) {
        throw new MLModelBuilderException(
            "Categorical attribute "
                + workflow.getResponseVariable()
                + " cannot be used as the response variable of the Numerical Prediction algorithm: "
                + workflow.getAlgorithmName());
      }

      // generate train and test datasets by converting tokens to labeled points
      int responseIndex = context.getResponseIndex();
      SortedMap<Integer, String> includedFeatures =
          MLUtils.getIncludedFeaturesAfterReordering(
              workflow, context.getNewToOldIndicesList(), responseIndex);

      // gets the pre-processed dataset
      JavaRDD<LabeledPoint> labeledPoints = preProcess().cache();

      JavaRDD<LabeledPoint>[] dataSplit =
          labeledPoints.randomSplit(
              new double[] {workflow.getTrainDataFraction(), 1 - workflow.getTrainDataFraction()},
              MLConstants.RANDOM_SEED);

      // remove from cache
      labeledPoints.unpersist();

      JavaRDD<LabeledPoint> trainingData = dataSplit[0].cache();
      JavaRDD<LabeledPoint> testingData = dataSplit[1];
      // create a deployable MLModel object
      mlModel.setAlgorithmName(workflow.getAlgorithmName());
      mlModel.setAlgorithmClass(workflow.getAlgorithmClass());
      mlModel.setFeatures(workflow.getIncludedFeatures());
      mlModel.setResponseVariable(workflow.getResponseVariable());
      mlModel.setEncodings(context.getEncodings());
      mlModel.setNewToOldIndicesList(context.getNewToOldIndicesList());
      mlModel.setResponseIndex(responseIndex);

      ModelSummary summaryModel = null;
      Map<Integer, Integer> categoricalFeatureInfo;

      // build a machine learning model according to user selected algorithm
      SUPERVISED_ALGORITHM supervisedAlgorithm =
          SUPERVISED_ALGORITHM.valueOf(workflow.getAlgorithmName());
      switch (supervisedAlgorithm) {
        case LOGISTIC_REGRESSION:
          summaryModel =
              buildLogisticRegressionModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures,
                  true);
          break;
        case LOGISTIC_REGRESSION_LBFGS:
          summaryModel =
              buildLogisticRegressionModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures,
                  false);
          break;
        case DECISION_TREE:
          categoricalFeatureInfo = getCategoricalFeatureInfo(context.getEncodings());
          summaryModel =
              buildDecisionTreeModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures,
                  categoricalFeatureInfo);
          break;
        case RANDOM_FOREST:
          categoricalFeatureInfo = getCategoricalFeatureInfo(context.getEncodings());
          summaryModel =
              buildRandomForestTreeModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures,
                  categoricalFeatureInfo);
          break;
        case SVM:
          summaryModel =
              buildSVMModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures);
          break;
        case NAIVE_BAYES:
          summaryModel =
              buildNaiveBayesModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures);
          break;
        case LINEAR_REGRESSION:
          summaryModel =
              buildLinearRegressionModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures);
          break;
        case RIDGE_REGRESSION:
          summaryModel =
              buildRidgeRegressionModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures);
          break;
        case LASSO_REGRESSION:
          summaryModel =
              buildLassoRegressionModel(
                  sparkContext,
                  modelId,
                  trainingData,
                  testingData,
                  workflow,
                  mlModel,
                  includedFeatures);
          break;
        default:
          throw new AlgorithmNameException("Incorrect algorithm name");
      }

      // persist model summary
      databaseService.updateModelSummary(modelId, summaryModel);
      return mlModel;
    } catch (Exception e) {
      throw new MLModelBuilderException(
          "An error occurred while building supervised machine learning model: " + e.getMessage(),
          e);
    }
  }