/**
   * This method builds a logistic regression model
   *
   * @param sparkContext JavaSparkContext initialized with the application
   * @param modelID Model ID
   * @param trainingData Training data as a JavaRDD of LabeledPoints
   * @param testingData Testing data as a JavaRDD of LabeledPoints
   * @param workflow Machine learning workflow
   * @param mlModel Deployable machine learning model
   * @param isSGD Whether the algorithm is Logistic regression with SGD
   * @throws MLModelBuilderException
   */
  private ModelSummary buildLogisticRegressionModel(
      JavaSparkContext sparkContext,
      long modelID,
      JavaRDD<LabeledPoint> trainingData,
      JavaRDD<LabeledPoint> testingData,
      Workflow workflow,
      MLModel mlModel,
      SortedMap<Integer, String> includedFeatures,
      boolean isSGD)
      throws MLModelBuilderException {
    try {
      LogisticRegression logisticRegression = new LogisticRegression();
      Map<String, String> hyperParameters = workflow.getHyperParameters();
      LogisticRegressionModel logisticRegressionModel;
      String algorithmName;

      int noOfClasses = getNoOfClasses(mlModel);

      if (isSGD) {
        algorithmName = SUPERVISED_ALGORITHM.LOGISTIC_REGRESSION.toString();

        if (noOfClasses > 2) {
          throw new MLModelBuilderException(
              "A binary classification algorithm cannot have more than "
                  + "two distinct values in response variable.");
        }

        logisticRegressionModel =
            logisticRegression.trainWithSGD(
                trainingData,
                Double.parseDouble(hyperParameters.get(MLConstants.LEARNING_RATE)),
                Integer.parseInt(hyperParameters.get(MLConstants.ITERATIONS)),
                hyperParameters.get(MLConstants.REGULARIZATION_TYPE),
                Double.parseDouble(hyperParameters.get(MLConstants.REGULARIZATION_PARAMETER)),
                Double.parseDouble(hyperParameters.get(MLConstants.SGD_DATA_FRACTION)));
      } else {
        algorithmName = SUPERVISED_ALGORITHM.LOGISTIC_REGRESSION_LBFGS.toString();
        logisticRegressionModel =
            logisticRegression.trainWithLBFGS(
                trainingData, hyperParameters.get(MLConstants.REGULARIZATION_TYPE), noOfClasses);
      }

      // remove from cache
      trainingData.unpersist();
      // add test data to cache
      testingData.cache();

      Vector weights = logisticRegressionModel.weights();
      if (!isValidWeights(weights)) {
        throw new MLModelBuilderException(
            "Weights of the model generated are null or infinity. [Weights] "
                + vectorToString(weights));
      }

      // getting scores and labels without clearing threshold to get confusion matrix
      JavaRDD<Tuple2<Object, Object>> scoresAndLabelsThresholded =
          logisticRegression.test(logisticRegressionModel, testingData);
      MulticlassMetrics multiclassMetrics =
          new MulticlassMetrics(JavaRDD.toRDD(scoresAndLabelsThresholded));
      MulticlassConfusionMatrix multiclassConfusionMatrix =
          getMulticlassConfusionMatrix(multiclassMetrics, mlModel);

      // clearing the threshold value to get a probability as the output of the prediction
      logisticRegressionModel.clearThreshold();
      JavaRDD<Tuple2<Object, Object>> scoresAndLabels =
          logisticRegression.test(logisticRegressionModel, testingData);
      ProbabilisticClassificationModelSummary probabilisticClassificationModelSummary =
          SparkModelUtils.generateProbabilisticClassificationModelSummary(
              sparkContext, testingData, scoresAndLabels);
      mlModel.setModel(new MLClassificationModel(logisticRegressionModel));

      // remove from cache
      testingData.unpersist();

      List<FeatureImportance> featureWeights =
          getFeatureWeights(includedFeatures, logisticRegressionModel.weights().toArray());
      probabilisticClassificationModelSummary.setFeatures(
          includedFeatures.values().toArray(new String[0]));
      probabilisticClassificationModelSummary.setFeatureImportance(featureWeights);
      probabilisticClassificationModelSummary.setAlgorithm(algorithmName);

      probabilisticClassificationModelSummary.setMulticlassConfusionMatrix(
          multiclassConfusionMatrix);
      Double modelAccuracy = getModelAccuracy(multiclassMetrics);
      probabilisticClassificationModelSummary.setModelAccuracy(modelAccuracy);
      probabilisticClassificationModelSummary.setDatasetVersion(workflow.getDatasetVersion());

      return probabilisticClassificationModelSummary;
    } catch (Exception e) {
      throw new MLModelBuilderException(
          "An error occurred while building logistic regression model: " + e.getMessage(), e);
    }
  }