public static int convolve(Kernel2D_I32 kernel, ImageBorder_S32 input, int x, int y) {
    final int r = kernel.getRadius();
    final int w = kernel.getWidth();

    int total = 0;

    for (int i = 0; i < w; i++) {
      for (int j = 0; j < w; j++) {
        total += input.get(x + j - r, y + i - r) * kernel.get(i, j);
      }
    }

    return total;
  }
  public static void convolve3(Kernel2D_I32 kernel, ImageSInt16 src, ImageInt16 dest, int divisor) {
    final short[] dataSrc = src.data;
    final short[] dataDst = dest.data;

    final int width = src.getWidth();
    final int height = src.getHeight();
    final int halfDivisor = divisor / 2;

    final int kernelRadius = kernel.getRadius();
    final int totalRow[] = new int[width];

    for (int y = kernelRadius; y < height - kernelRadius; y++) {

      // first time through the value needs to be set
      int k1 = kernel.data[0];
      int k2 = kernel.data[1];
      int k3 = kernel.data[2];

      int indexSrcRow = src.startIndex + (y - kernelRadius) * src.stride - kernelRadius;
      for (int x = kernelRadius; x < width - kernelRadius; x++) {
        int indexSrc = indexSrcRow + x;

        int total = 0;
        total += (dataSrc[indexSrc++]) * k1;
        total += (dataSrc[indexSrc++]) * k2;
        total += (dataSrc[indexSrc]) * k3;

        totalRow[x] = total;
      }

      // rest of the convolution rows are an addition
      for (int i = 1; i < 3; i++) {
        indexSrcRow = src.startIndex + (y + i - kernelRadius) * src.stride - kernelRadius;

        k1 = kernel.data[i * 3 + 0];
        k2 = kernel.data[i * 3 + 1];
        k3 = kernel.data[i * 3 + 2];

        for (int x = kernelRadius; x < width - kernelRadius; x++) {
          int indexSrc = indexSrcRow + x;

          int total = 0;
          total += (dataSrc[indexSrc++]) * k1;
          total += (dataSrc[indexSrc++]) * k2;
          total += (dataSrc[indexSrc]) * k3;

          totalRow[x] += total;
        }
      }
      int indexDst = dest.startIndex + y * dest.stride + kernelRadius;
      for (int x = kernelRadius; x < width - kernelRadius; x++) {
        dataDst[indexDst++] = (short) ((totalRow[x] + halfDivisor) / divisor);
      }
    }
  }
  public static void convolve(Kernel2D_I32 kernel, GrayS16 input, GrayI16 output) {
    final short[] dataSrc = input.data;
    final short[] dataDst = output.data;
    final int[] dataKer = kernel.data;

    final int kernelWidth = kernel.getWidth();
    final int offsetL = kernel.getOffset();
    final int offsetR = kernelWidth - offsetL - 1;

    final int width = input.getWidth();
    final int height = input.getHeight();

    // convolve across the left and right borders
    for (int y = 0; y < height; y++) {

      int minI = y >= offsetL ? -offsetL : -y;
      int maxI = y < height - offsetR ? offsetR : height - y - 1;

      int indexDst = output.startIndex + y * output.stride;

      for (int x = 0; x < offsetL; x++) {

        int total = 0;
        int weight = 0;

        for (int i = minI; i <= maxI; i++) {
          int indexSrc = input.startIndex + (y + i) * input.stride + x;
          int indexKer = (i + offsetL) * kernelWidth;

          for (int j = -x; j <= offsetR; j++) {
            int w = dataKer[indexKer + j + offsetL];
            weight += w;
            total += (dataSrc[indexSrc + j]) * w;
          }
        }

        dataDst[indexDst++] = (short) ((total + weight / 2) / weight);
      }

      indexDst = output.startIndex + y * output.stride + width - offsetR;
      for (int x = width - offsetR; x < width; x++) {

        int maxJ = width - x - 1;

        int total = 0;
        int weight = 0;

        for (int i = minI; i <= maxI; i++) {
          int indexSrc = input.startIndex + (y + i) * input.stride + x;
          int indexKer = (i + offsetL) * kernelWidth;

          for (int j = -offsetL; j <= maxJ; j++) {
            int w = dataKer[indexKer + j + offsetL];
            weight += w;
            total += (dataSrc[indexSrc + j]) * w;
          }
        }

        dataDst[indexDst++] = (short) ((total + weight / 2) / weight);
      }
    }

    // convolve across the top border while avoiding convolving the corners again
    for (int y = 0; y < offsetL; y++) {

      int indexDst = output.startIndex + y * output.stride + offsetL;

      for (int x = offsetL; x < width - offsetR; x++) {

        int total = 0;
        int weight = 0;

        for (int i = -y; i <= offsetR; i++) {
          int indexSrc = input.startIndex + (y + i) * input.stride + x;
          int indexKer = (i + offsetL) * kernelWidth;

          for (int j = -offsetL; j <= offsetR; j++) {
            int w = dataKer[indexKer + j + offsetL];
            weight += w;
            total += (dataSrc[indexSrc + j]) * w;
          }
        }
        dataDst[indexDst++] = (short) ((total + weight / 2) / weight);
      }
    }

    // convolve across the bottom border
    for (int y = height - offsetR; y < height; y++) {

      int maxI = height - y - 1;
      int indexDst = output.startIndex + y * output.stride + offsetL;

      for (int x = offsetL; x < width - offsetR; x++) {

        int total = 0;
        int weight = 0;

        for (int i = -offsetL; i <= maxI; i++) {
          int indexSrc = input.startIndex + (y + i) * input.stride + x;
          int indexKer = (i + offsetL) * kernelWidth;

          for (int j = -offsetL; j <= offsetR; j++) {
            int w = dataKer[indexKer + j + offsetL];
            weight += w;
            total += (dataSrc[indexSrc + j]) * w;
          }
        }
        dataDst[indexDst++] = (short) ((total + weight / 2) / weight);
      }
    }
  }