Exemplo n.º 1
0
  @Override
  public void trainMostSimilar(List<EnsembleSim> simList) {
    if (simList.isEmpty()) {
      throw new IllegalStateException("no examples to train on!");
    }
    mostSimilarInterpolator.trainMostSimilar(simList);

    // Remove things that have no observed metrics
    List<EnsembleSim> pruned = new ArrayList<EnsembleSim>();
    for (EnsembleSim es : simList) {
      if (es != null && es.getNumMetricsWithScore() > 0) {
        pruned.add(es);
      }
    }

    double[][] X = new double[pruned.size()][numMetrics * 2];
    double[] Y = new double[pruned.size()];
    for (int i = 0; i < pruned.size(); i++) {
      Y[i] = pruned.get(i).knownSim.similarity;
      EnsembleSim es = mostSimilarInterpolator.interpolate(pruned.get(i));
      for (int j = 0; j < numMetrics; j++) {
        X[i][2 * j] = es.getScores().get(j);
        X[i][2 * j + 1] = Math.log(es.getRanks().get(j) + 1);
      }
    }

    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.newSampleData(Y, X);

    mostSimilarCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters());
    double pearson = Math.sqrt(regression.calculateRSquared());
    LOG.info("coefficients are " + mostSimilarCoefficients.toString());
    LOG.info("pearson for multiple regression is " + pearson);
  }
Exemplo n.º 2
0
  public static double[] interpolate(
      double min, double max, double step, double curve[][], double result[]) {
    Interpolator interpolator = new Interpolator();

    if (result == null) result = new double[(int) ((max - min) / step)];

    for (int i = 0; i < result.length; i++)
      result[i] = interpolator.interpolate(min + step * i, curve);

    return result;
  }
Exemplo n.º 3
0
  public static short[] interpolate(
      double min, double max, double step, double curve[][], short result[]) {
    Interpolator interpolator = new Interpolator();

    if (result == null) result = new short[(int) ((max - min) / step)];

    for (int i = 0; i < result.length; i++)
      result[i] =
          (short) ((int) (interpolator.interpolate(min + step * i, curve) * 0xffff + 0.5) & 0xffff);

    return result;
  }
Exemplo n.º 4
0
  @Override
  public void trainSimilarity(List<EnsembleSim> simList) {
    if (simList.isEmpty()) {
      throw new IllegalArgumentException("no examples to train on!");
    }
    similarityInterpolator.trainSimilarity(simList);
    double[][] X = new double[simList.size()][numMetrics];
    double[] Y = new double[simList.size()];
    for (int i = 0; i < simList.size(); i++) {
      Y[i] = simList.get(i).knownSim.similarity;
      EnsembleSim es = similarityInterpolator.interpolate(simList.get(i));
      for (int j = 0; j < numMetrics; j++) {
        X[i][j] = es.getScores().get(j);
      }
    }
    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.newSampleData(Y, X);

    simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters());
    double pearson = Math.sqrt(regression.calculateRSquared());
    LOG.info("coefficients are " + simlarityCoefficients.toString());
    LOG.info("pearson for multiple regression is " + pearson);
  }
  /**
   * Returns the next character in the filtered stream, replacing tokens from the original stream.
   *
   * @return the next character in the resulting stream, or -1 if the end of the resulting stream
   *     has been reached
   * @exception IOException if the underlying stream throws an IOException during reading
   */
  public int read() throws IOException {
    if (replaceIndex != -1 && replaceIndex < replaceData.length()) {
      int ch = replaceData.charAt(replaceIndex++);
      if (replaceIndex >= replaceData.length()) {
        replaceIndex = -1;
      }
      return ch;
    }

    int ch = -1;
    if (previousIndex != -1 && previousIndex < this.endToken.length()) {
      ch = this.endToken.charAt(previousIndex++);
    } else {
      ch = in.read();
    }

    if (ch == this.beginToken.charAt(0)) {
      StringBuffer key = new StringBuffer();

      key.append((char) ch);

      int beginTokenMatchPos = 1;

      do {
        if (previousIndex != -1 && previousIndex < this.endToken.length()) {
          ch = this.endToken.charAt(previousIndex++);
        } else {
          ch = in.read();
        }
        if (ch != -1) {
          key.append((char) ch);

          if ((beginTokenMatchPos < this.beginToken.length())
              && (ch != this.beginToken.charAt(beginTokenMatchPos++))) {
            ch = -1; // not really EOF but to trigger code below
            break;
          }
        } else {
          break;
        }
      } while (ch != this.endToken.charAt(0));

      // now test endToken
      if (ch != -1 && this.endToken.length() > 1) {
        int endTokenMatchPos = 1;

        do {
          if (previousIndex != -1 && previousIndex < this.endToken.length()) {
            ch = this.endToken.charAt(previousIndex++);
          } else {
            ch = in.read();
          }

          if (ch != -1) {
            key.append((char) ch);

            if (ch != this.endToken.charAt(endTokenMatchPos++)) {
              ch = -1; // not really EOF but to trigger code below
              break;
            }

          } else {
            break;
          }
        } while (endTokenMatchPos < this.endToken.length());
      }

      // There is nothing left to read so we have the situation where the begin/end token
      // are in fact the same and as there is nothing left to read we have got ourselves
      // end of a token boundary so let it pass through.
      if (ch == -1) {
        replaceData = key.toString();
        replaceIndex = 1;
        return replaceData.charAt(0);
      }

      String value;
      try {
        value = interpolator.interpolate(key.toString(), "");
      } catch (InterpolationException e) {
        IllegalArgumentException error = new IllegalArgumentException(e.getMessage());
        error.initCause(e);

        throw error;
      }

      if (value != null) {
        if (value.length() != 0) {
          replaceData = value;
          replaceIndex = 0;
        }
        return read();
      } else {
        previousIndex = 0;
        replaceData = key.substring(0, key.length() - this.endToken.length());
        replaceIndex = 0;
        return this.beginToken.charAt(0);
      }
    }

    return ch;
  }