@Override
    protected void compute() {

      if (aEnd - aStart > minSize && bEnd - bStart > minSize) {
        final int aMiddle = aStart + (aEnd - aStart) / 2;
        final int bMiddle = bStart + (bEnd - bStart) / 2;

        invokeAll(
            new MulitplyPartly(a, b, result, aStart, aMiddle, bStart, bMiddle),
            new MulitplyPartly(a, b, result, aMiddle, aEnd, bMiddle, bEnd),
            new MulitplyPartly(a, b, result, aStart, aMiddle, bMiddle, bEnd),
            new MulitplyPartly(a, b, result, aMiddle, aEnd, bStart, bMiddle));
      } else {
        FloatMatrix x =
            a.get(new IntervalRange(aStart, aEnd), new IntervalRange(0, a.columns))
                .mmul(b.get(new IntervalRange(0, b.rows), new IntervalRange(bStart, bEnd)));
        result.put(x, aStart, bStart);
      }
    }
 private FloatMatrix computeFloatTensorDeltaDown(
     FloatMatrix deltaFull,
     FloatMatrix leftVector,
     FloatMatrix rightVector,
     FloatMatrix W,
     FloatTensor Wt) {
   FloatMatrix WTDelta = W.transpose().mmul(deltaFull);
   FloatMatrix WTDeltaNoBias = WTDelta.get(interval(0, 1), interval(0, deltaFull.rows * 2));
   int size = deltaFull.length;
   FloatMatrix deltaFloatTensor = new FloatMatrix(size * 2, 1);
   FloatMatrix fullVector = FloatMatrix.concatHorizontally(leftVector, rightVector);
   for (int slice = 0; slice < size; ++slice) {
     FloatMatrix scaledFullVector = SimpleBlas.scal(deltaFull.get(slice), fullVector);
     deltaFloatTensor =
         deltaFloatTensor.add(
             Wt.getSlice(slice).add(Wt.getSlice(slice).transpose()).mmul(scaledFullVector));
   }
   return deltaFloatTensor.add(WTDeltaNoBias);
 }
 private FloatTensor getFloatTensorGradient(
     FloatMatrix deltaFull, FloatMatrix leftVector, FloatMatrix rightVector) {
   int size = deltaFull.length;
   FloatTensor Wt_df = new FloatTensor(size * 2, size * 2, size);
   FloatMatrix fullVector = FloatMatrix.concatHorizontally(leftVector, rightVector);
   for (int slice = 0; slice < size; ++slice) {
     Wt_df.setSlice(
         slice, SimpleBlas.scal(deltaFull.get(slice), fullVector).mmul(fullVector.transpose()));
   }
   return Wt_df;
 }
  public FloatMatrix normalizeRatings() {

    int[] indices;
    FloatMatrix yMean = FloatMatrix.zeros(rows, 1);
    FloatMatrix yNorm = FloatMatrix.zeros(rows, columns);

    for (int i = 0; i < rows; i++) {
      indices = r.getRow(i).eq(1).findIndices();
      yMean.put(i, y.getRow(i).get(indices).mean());
      yNorm.getRow(i).put(indices, y.getRow(i).get(indices).sub(yMean.get(i)));
    }

    return yMean;
  }
  /**
   * Given a sequence of Iterators over a set of matrices, fill in all of the matrices with the
   * entries in the theta vector. Errors are thrown if the theta vector does not exactly fill the
   * matrices.
   */
  public void setParams(FloatMatrix theta, Iterator<? extends FloatMatrix>... matrices) {
    int index = 0;
    for (Iterator<? extends FloatMatrix> matrixIterator : matrices) {
      while (matrixIterator.hasNext()) {
        FloatMatrix matrix = matrixIterator.next();
        for (int i = 0; i < matrix.length; ++i) {
          matrix.put(i, theta.get(index));
          ++index;
        }
      }
    }

    if (index != theta.length) {
      throw new AssertionError("Did not entirely use the theta vector");
    }
  }
  public static FloatMatrix conv2d(FloatMatrix input, FloatMatrix kernel, Type type) {

    FloatMatrix xShape = new FloatMatrix(1, 2);
    xShape.put(0, input.rows);
    xShape.put(1, input.columns);

    FloatMatrix yShape = new FloatMatrix(1, 2);
    yShape.put(0, kernel.rows);
    yShape.put(1, kernel.columns);

    FloatMatrix zShape = xShape.add(yShape).sub(1);
    int retRows = (int) zShape.get(0);
    int retCols = (int) zShape.get(1);

    ComplexFloatMatrix fftInput = complexDisceteFourierTransform(input, retRows, retCols);
    ComplexFloatMatrix fftKernel = complexDisceteFourierTransform(kernel, retRows, retCols);
    ComplexFloatMatrix mul = fftKernel.mul(fftInput);
    ComplexFloatMatrix retComplex = complexInverseDisceteFourierTransform(mul);

    FloatMatrix ret = retComplex.getReal();

    if (type == Type.VALID) {

      FloatMatrix validShape = xShape.subi(yShape).add(1);

      FloatMatrix start = zShape.sub(validShape).div(2);
      FloatMatrix end = start.add(validShape);
      if (start.get(0) < 1 || start.get(1) < 1)
        throw new IllegalStateException("Illegal row index " + start);
      if (end.get(0) < 1 || end.get(1) < 1)
        throw new IllegalStateException("Illegal column index " + end);

      ret =
          ret.get(
              RangeUtils.interval((int) start.get(0), (int) end.get(0)),
              RangeUtils.interval((int) start.get(1), (int) end.get(1)));
    }

    return ret;
  }
  private void backpropDerivativesAndError(
      Tree tree,
      MultiDimensionalMap<String, String, FloatMatrix> binaryTD,
      MultiDimensionalMap<String, String, FloatMatrix> binaryCD,
      MultiDimensionalMap<String, String, FloatTensor> binaryFloatTensorTD,
      Map<String, FloatMatrix> unaryCD,
      Map<String, FloatMatrix> wordVectorD,
      FloatMatrix deltaUp) {
    if (tree.isLeaf()) {
      return;
    }

    FloatMatrix currentVector = tree.vector();
    String category = tree.label();
    category = basicCategory(category);

    // Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class
    FloatMatrix goldLabel = new FloatMatrix(numOuts, 1);
    int goldClass = tree.goldLabel();
    if (goldClass >= 0) {
      goldLabel.put(goldClass, 1.0f);
    }

    Float nodeWeight = classWeights.get(goldClass);
    if (nodeWeight == null) nodeWeight = 1.0f;
    FloatMatrix predictions = tree.prediction();

    // If this is an unlabeled class, set deltaClass to 0.  We could
    // make this more efficient by eliminating various of the below
    // calculations, but this would be the easiest way to handle the
    // unlabeled class
    FloatMatrix deltaClass =
        goldClass >= 0
            ? SimpleBlas.scal(nodeWeight, predictions.sub(goldLabel))
            : new FloatMatrix(predictions.rows, predictions.columns);
    FloatMatrix localCD = deltaClass.mmul(appendBias(currentVector).transpose());

    float error = -(MatrixFunctions.log(predictions).muli(goldLabel).sum());
    error = error * nodeWeight;
    tree.setError(error);

    if (tree.isPreTerminal()) { // below us is a word vector
      unaryCD.put(category, unaryCD.get(category).add(localCD));

      String word = tree.children().get(0).label();
      word = getVocabWord(word);

      FloatMatrix currentVectorDerivative = activationFunction.apply(currentVector);
      FloatMatrix deltaFromClass = getUnaryClassification(category).transpose().mmul(deltaClass);
      deltaFromClass =
          deltaFromClass.get(interval(0, numHidden), interval(0, 1)).mul(currentVectorDerivative);
      FloatMatrix deltaFull = deltaFromClass.add(deltaUp);
      wordVectorD.put(word, wordVectorD.get(word).add(deltaFull));

    } else {
      // Otherwise, this must be a binary node
      String leftCategory = basicCategory(tree.children().get(0).label());
      String rightCategory = basicCategory(tree.children().get(1).label());
      if (combineClassification) {
        unaryCD.put("", unaryCD.get("").add(localCD));
      } else {
        binaryCD.put(
            leftCategory, rightCategory, binaryCD.get(leftCategory, rightCategory).add(localCD));
      }

      FloatMatrix currentVectorDerivative = activationFunction.applyDerivative(currentVector);
      FloatMatrix deltaFromClass =
          getBinaryClassification(leftCategory, rightCategory).transpose().mmul(deltaClass);

      FloatMatrix mult = deltaFromClass.get(interval(0, numHidden), interval(0, 1));
      deltaFromClass = mult.muli(currentVectorDerivative);
      FloatMatrix deltaFull = deltaFromClass.add(deltaUp);

      FloatMatrix leftVector = tree.children().get(0).vector();
      FloatMatrix rightVector = tree.children().get(1).vector();

      FloatMatrix childrenVector = appendBias(leftVector, rightVector);

      // deltaFull 50 x 1, childrenVector: 50 x 2
      FloatMatrix add = binaryTD.get(leftCategory, rightCategory);

      FloatMatrix W_df = deltaFromClass.mmul(childrenVector.transpose());
      binaryTD.put(leftCategory, rightCategory, add.add(W_df));

      FloatMatrix deltaDown;
      if (useFloatTensors) {
        FloatTensor Wt_df = getFloatTensorGradient(deltaFull, leftVector, rightVector);
        binaryFloatTensorTD.put(
            leftCategory,
            rightCategory,
            binaryFloatTensorTD.get(leftCategory, rightCategory).add(Wt_df));
        deltaDown =
            computeFloatTensorDeltaDown(
                deltaFull,
                leftVector,
                rightVector,
                getBinaryTransform(leftCategory, rightCategory),
                getBinaryFloatTensor(leftCategory, rightCategory));
      } else {
        deltaDown = getBinaryTransform(leftCategory, rightCategory).transpose().mmul(deltaFull);
      }

      FloatMatrix leftDerivative = activationFunction.apply(leftVector);
      FloatMatrix rightDerivative = activationFunction.apply(rightVector);
      FloatMatrix leftDeltaDown = deltaDown.get(interval(0, deltaFull.rows), interval(0, 1));
      FloatMatrix rightDeltaDown =
          deltaDown.get(interval(deltaFull.rows, deltaFull.rows * 2), interval(0, 1));
      backpropDerivativesAndError(
          tree.children().get(0),
          binaryTD,
          binaryCD,
          binaryFloatTensorTD,
          unaryCD,
          wordVectorD,
          leftDerivative.mul(leftDeltaDown));
      backpropDerivativesAndError(
          tree.children().get(1),
          binaryTD,
          binaryCD,
          binaryFloatTensorTD,
          unaryCD,
          wordVectorD,
          rightDerivative.mul(rightDeltaDown));
    }
  }