@Override
 public Label predict(Instance instance) {
   Label l = null;
   if (instance.getLabel() instanceof ClassificationLabel || instance.getLabel() == null) {
     // ----------------- declare variables ------------------
     double lambda = 0.0;
     RealVector x_instance = new ArrayRealVector(matrixX.getColumnDimension(), 0);
     double result = 0.0;
     // -------------------------- initialize xi -------------------------
     for (int idx = 0; idx < matrixX.getColumnDimension(); idx++) {
       x_instance.setEntry(idx, instance.getFeatureVector().get(idx + 1));
     }
     // ------------------ get lambda -----------------------
     for (int j = 0; j < alpha.getDimension(); j++) {
       lambda += alpha.getEntry(j) * kernelFunction(matrixX.getRowVector(j), x_instance);
     }
     // ----------------- make prediction -----------------
     Sigmoid g = new Sigmoid(); // helper function
     result = g.value(lambda);
     l = new ClassificationLabel(result < 0.5 ? 0 : 1);
   } else {
     System.out.println("label type error!");
   }
   return l;
 }
 /**
  * @param instances
  * @throws Exception
  */
 public void train(List<Instance> instances) {
   assert (instances.size() > 0);
   int numOfAttributes = instances.get(0).getNumOfAttributes();
   initializeWeights(numOfAttributes);
   // Stochastic gradient descent training algorithm
   int size = instances.size();
   for (int i = 0; i < numOfIterations; i++) {
     double logit = 0d, loss = 0d;
     int label, predicted, errorCount = 0;
     double[] oldWeights = new double[numOfAttributes];
     for (int j = 0; j < numOfAttributes; j++) oldWeights[j] = weights[j];
     for (Instance instance : instances) {
       double[] attributeValues = instance.getAttributes();
       label = instance.getLabel();
       logit = innerProduct(attributeValues);
       assert (label == 1 || label == -1);
       loss = sigmoid(label * logit);
       // prediction based on current weight
       if (sigmoid(logit) <= 0.5) predicted = -1;
       else predicted = 1;
       if (predicted != label) errorCount++;
       // Weights updating by stochastic gradient descent
       for (int j = 0; j < numOfAttributes; j++) {
         weights[j] += learningRate * label * attributeValues[j] * (1 - loss);
       }
     }
     if (i % 500 == 0)
       logger.debug("Iteration = {}, training error = {}", i, (double) errorCount / size);
   }
 }
  /**
   * computes the cost function which is to be minimized.
   *
   * @param examples training data
   * @return cost
   */
  public double computeCost(ArrayList<Instance> examples) {
    int m = examples.size();

    double cost = 0.0;
    for (Instance example : examples) {
      double prediction = computeHypothesis(example.getAllInputs());
      int label = example.getLabel();
      cost += label * Math.log(prediction) + (1.0 - label) * Math.log(1.0 - prediction);
    }
    return -cost / m;
  }
  @Override
  public void train(List<Instance> instances) {
    // Initial settings

    // loop for sgd_iters passes for the dataset
    for (int p = 0; p < sgd_iters; ++p) {
      int n = instances.size();
      // loop through dataset
      for (int i = 0; i < n; ++i) { // t=i+p*n
        int t = i + p * n + 1;

        Instance curInst = instances.get(i); // should be random in original Pegasos
        int yi = Integer.parseInt(curInst.getLabel().toString()); // in this case should be int.
        if (yi == 0) { // convert yi to {+1,-1}
          yi = -1;
        }

        HashMap<Integer, Double> features = curInst.getFeatureVector().getFeatureMap();
        double wx = this.hashMapVectorProduct(this.w, features); // w*x_i

        Iterator<Map.Entry<Integer, Double>> wIt = w.entrySet().iterator();
        while (wIt.hasNext()) {
          Map.Entry<Integer, Double> entry = wIt.next();
          int k = entry.getKey();
          double v = entry.getValue();
          v = (1.0 - 1.0 / t) * v;
          w.put(k, v);
        }

        // loop through featureVector for w_j
        Iterator<Map.Entry<Integer, Double>> featureIter = features.entrySet().iterator();
        while (yi * wx < 1 && featureIter.hasNext()) {
          Map.Entry<Integer, Double> entry = featureIter.next();
          int j = entry.getKey();
          double xij = entry.getValue();
          if (null == w.get(j)) {
            w.put(j, this.init_w);
          }
          double update = 1.0 / (pegasos_lambda * t) * yi * xij;
          w.put(j, w.get(j) + update);
        }
      }
    }
  }
  protected void _trainIteration(ArrayList<Instance> examples, double rate) {
    double biasGradient = 0.0;
    double[] gradients = new double[weights.length];

    for (Instance example : examples) {

      double error = example.getLabel() - computeHypothesis(example.getAllInputs());
      biasGradient += (error);
      for (int i = 0; i < gradients.length; i++) {
        gradients[i] += (error) * example.getInput(i);
      }
    }
    for (int i = 0; i < weights.length; i++) {
      gradients[i] = gradients[i] / examples.size();
    }

    biasWeight += rate * biasGradient / examples.size();
    for (int i = 0; i < weights.length; i++) {
      weights[i] += rate * gradients[i] / examples.size();
    }
  }