Java Instances.meanOrModeの例

プログラミング言語: Java

名前空間/パッケージ名: weka.core

クラス/型: Instances

メソッド/関数: meanOrMode

hotexamples.comのコード掲載数: 11

Java Instances.meanOrMode - 11件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたJavaのweka.core.Instances.meanOrModeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

add(30)

deleteWithMissingClass(30)

numClasses(30)

numAttributes(30)

setClassIndex(30)

instance(30)

numInstances(30)

classIndex(30)

classAttribute(30)

attribute(30)

relationName(19)

enumerateInstances(17)

setClass(13)

get(12)

insertAttributeAt(11)

delete(11)

meanOrMode(11)

randomize(10)

stringFreeStructure(9)

testCV(8)

attributeToDoubleArray(8)

equalHeaders(8)

sumOfWeights(7)

toString(7)

deleteAttributeAt(7)

enumerateAttributes(7)

trainCV(6)

size(6)

setRelationName(5)

stratify(5)

mergeInstances(5)

variance(5)

checkForStringAttributes(5)

equalHeadersMsg(4)

deleteStringAttributes(3)

getRandomNumberGenerator(3)

firstInstance(3)

numDistinctValues(3)

attributeStats(2)

toSummaryString(2)

clear(2)

readInstance(2)

sort(2)

deleteWithMissing(1)

lastInstance(1)

swap(1)

appendAttribute(1)

resampleWithWeights(1)

renameAttribute(1)

remove(1)

コード例 #1

ファイルを表示

ファイル: LinearRegression.java プロジェクト: ngphloc/zebra

  /**
   * Builds a regression model for the given data.
   *
   * @param data the training data to be used for generating the linear regression function
   * @throws Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {

    if (!m_checksTurnedOff) {
      // can classifier handle the data?
      getCapabilities().testWithFail(data);

      // remove instances with missing class
      data = new Instances(data);
      data.deleteWithMissingClass();
    }

    // Preprocess instances
    if (!m_checksTurnedOff) {
      m_TransformFilter = new NominalToBinary();
      m_TransformFilter.setInputFormat(data);
      data = Filter.useFilter(data, m_TransformFilter);
      m_MissingFilter = new ReplaceMissingValues();
      m_MissingFilter.setInputFormat(data);
      data = Filter.useFilter(data, m_MissingFilter);
      data.deleteWithMissingClass();
    } else {
      m_TransformFilter = null;
      m_MissingFilter = null;
    }

    m_ClassIndex = data.classIndex();
    m_TransformedData = data;

    // Turn all attributes on for a start
    m_SelectedAttributes = new boolean[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
      if (i != m_ClassIndex) {
        m_SelectedAttributes[i] = true;
      }
    }
    m_Coefficients = null;

    // Compute means and standard deviations
    m_Means = new double[data.numAttributes()];
    m_StdDevs = new double[data.numAttributes()];
    for (int j = 0; j < data.numAttributes(); j++) {
      if (j != data.classIndex()) {
        m_Means[j] = data.meanOrMode(j);
        m_StdDevs[j] = Math.sqrt(data.variance(j));
        if (m_StdDevs[j] == 0) {
          m_SelectedAttributes[j] = false;
        }
      }
    }

    m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex()));
    m_ClassMean = data.meanOrMode(m_TransformedData.classIndex());

    // Perform the regression
    findBestModel();

    // Save memory
    m_TransformedData = new Instances(data, 0);
  }

コード例 #2

ファイルを表示

ファイル: Decorate.java プロジェクト: paolopavan/cfr

  /**
   * Compute and store statistics required for generating artificial data.
   *
   * @param data training instances
   * @exception Exception if statistics could not be calculated successfully
   */
  protected void computeStats(Instances data) throws Exception {
    int numAttributes = data.numAttributes();
    m_AttributeStats = new Vector(numAttributes); // use to map attributes to their stats

    for (int j = 0; j < numAttributes; j++) {
      if (data.attribute(j).isNominal()) {
        // Compute the probability of occurence of each distinct value
        int[] nomCounts = (data.attributeStats(j)).nominalCounts;
        double[] counts = new double[nomCounts.length];
        if (counts.length < 2)
          throw new Exception("Nominal attribute has less than two distinct values!");
        // Perform Laplace smoothing
        for (int i = 0; i < counts.length; i++) counts[i] = nomCounts[i] + 1;
        Utils.normalize(counts);
        double[] stats = new double[counts.length - 1];
        stats[0] = counts[0];
        // Calculate cumulative probabilities
        for (int i = 1; i < stats.length; i++) stats[i] = stats[i - 1] + counts[i];
        m_AttributeStats.add(j, stats);
      } else if (data.attribute(j).isNumeric()) {
        // Get mean and standard deviation from the training data
        double[] stats = new double[2];
        stats[0] = data.meanOrMode(j);
        stats[1] = Math.sqrt(data.variance(j));
        m_AttributeStats.add(j, stats);
      } else System.err.println("Decorate can only handle numeric and nominal values.");
    }
  }

コード例 #3

ファイルを表示

ファイル: TLDSimple.java プロジェクト: dachylong/weka

  /**
   * @param ex the given test exemplar
   * @return the classification
   * @throws Exception if the exemplar could not be classified successfully
   */
  public double classifyInstance(Instance ex) throws Exception {
    // Instance ex = new Exemplar(e);
    Instances exi = ex.relationalValue(1);
    double[] n = new double[m_Dimension];
    double[] xBar = new double[m_Dimension];
    for (int i = 0; i < exi.numAttributes(); i++) xBar[i] = exi.meanOrMode(i);

    for (int w = 0, t = 0; w < m_Dimension; w++, t++) {
      // if((t==m_ClassIndex) || (t==m_IdIndex))
      // t++;
      for (int u = 0; u < exi.numInstances(); u++)
        if (!exi.instance(u).isMissing(t)) n[w] += exi.instance(u).weight();
    }

    double logOdds = likelihoodRatio(n, xBar);
    return (logOdds > m_Cutoff) ? 1 : 0;
  }

コード例 #4

ファイルを表示

ファイル: MeanKurtosis.java プロジェクト: i02momuj/MLDA

  /**
   * Calculate metric value
   *
   * @param mlData Multi-label dataset to which calculate the metric
   * @return Value of the metric
   */
  public double calculate(MultiLabelInstances mlData) {
    Instances instances = mlData.getDataSet();
    int nInstances = mlData.getNumInstances();

    double avg;
    double var2;
    double var4;
    double val;
    int nNumeric = 0;
    double mean = 0;

    Set<Attribute> attributesSet = mlData.getFeatureAttributes();

    for (Attribute att : attributesSet) {
      if (att.isNumeric()) {
        nNumeric++;
        avg = instances.meanOrMode(att);
        var2 = 0;
        var4 = 0;

        for (Instance inst : instances) {
          val = inst.value(att);
          var2 += Math.pow(val - avg, 2);
          var4 += Math.pow(val - avg, 4);
        }

        double kurtosis = (nInstances * var4 / Math.pow(var2, 2)) - 3;
        double sampleKurtosis =
            (kurtosis * (nInstances + 1) + 6)
                * (nInstances - 1)
                / ((nInstances - 2) * (nInstances - 3));
        mean += sampleKurtosis;
      }
    }
    if (nNumeric > 0) {
      mean = mean / nNumeric;
    } else {
      mean = Double.NaN;
    }

    this.value = mean;
    return value;
  }

コード例 #5

ファイルを表示

ファイル: Center.java プロジェクト: hhyifeng/algorithm

  /**
   * Signify that this batch of input to the filter is finished. If the filter requires all
   * instances prior to filtering, output() may now be called to retrieve the filtered instances.
   *
   * @return true if there are instances pending output
   * @throws IllegalStateException if no input structure has been defined
   */
  public boolean batchFinished() {
    if (getInputFormat() == null)
      throw new IllegalStateException("No input instance format defined");

    if (m_Means == null) {
      Instances input = getInputFormat();
      m_Means = new double[input.numAttributes()];
      for (int i = 0; i < input.numAttributes(); i++) {
        if (input.attribute(i).isNumeric() && (input.classIndex() != i)) {
          m_Means[i] = input.meanOrMode(i);
        }
      }

      // Convert pending input instances
      for (int i = 0; i < input.numInstances(); i++) convertInstance(input.instance(i));
    }

    // Free memory
    flushInput();

    m_NewBatch = true;
    return (numPendingOutput() != 0);
  }

コード例 #6

ファイルを表示

ファイル: TLDSimple.java プロジェクト: dachylong/weka

  /**
   * Computes the distribution for a given exemplar
   *
   * @param ex the exemplar for which distribution is computed
   * @return the distribution
   * @throws Exception if the distribution can't be computed successfully
   */
  public double[] distributionForInstance(Instance ex) throws Exception {

    double[] distribution = new double[2];
    Instances exi = ex.relationalValue(1);
    double[] n = new double[m_Dimension];
    double[] xBar = new double[m_Dimension];
    for (int i = 0; i < exi.numAttributes(); i++) xBar[i] = exi.meanOrMode(i);

    for (int w = 0, t = 0; w < m_Dimension; w++, t++) {
      for (int u = 0; u < exi.numInstances(); u++)
        if (!exi.instance(u).isMissing(t)) n[w] += exi.instance(u).weight();
    }

    double logOdds = likelihoodRatio(n, xBar);

    // returned logOdds value has been divided by m_Dimension to avoid
    // Math.exp(logOdds) getting too large or too small,
    // that may result in two fixed distribution value (1 or 0).
    distribution[0] = 1 / (1 + Math.exp(logOdds)); // Prob. for class 0 (negative)
    distribution[1] = 1 - distribution[0];

    return distribution;
  }

コード例 #7

ファイルを表示

ファイル: TLD.java プロジェクト: 0x0539/weka

  /**
   * @param exs the training exemplars
   * @throws Exception if the model cannot be built properly
   */
  public void buildClassifier(Instances exs) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(exs);

    // remove instances with missing class
    exs = new Instances(exs);
    exs.deleteWithMissingClass();

    int numegs = exs.numInstances();
    m_Dimension = exs.attribute(1).relation().numAttributes();
    Instances pos = new Instances(exs, 0), neg = new Instances(exs, 0);

    for (int u = 0; u < numegs; u++) {
      Instance example = exs.instance(u);
      if (example.classValue() == 1) pos.add(example);
      else neg.add(example);
    }

    int pnum = pos.numInstances(), nnum = neg.numInstances();

    m_MeanP = new double[pnum][m_Dimension];
    m_VarianceP = new double[pnum][m_Dimension];
    m_SumP = new double[pnum][m_Dimension];
    m_MeanN = new double[nnum][m_Dimension];
    m_VarianceN = new double[nnum][m_Dimension];
    m_SumN = new double[nnum][m_Dimension];
    m_ParamsP = new double[4 * m_Dimension];
    m_ParamsN = new double[4 * m_Dimension];

    // Estimation of the parameters: as the start value for search
    double[] pSumVal = new double[m_Dimension], // for m
        nSumVal = new double[m_Dimension];
    double[] maxVarsP = new double[m_Dimension], // for a
        maxVarsN = new double[m_Dimension];
    // Mean of sample variances: for b, b=a/E(\sigma^2)+2
    double[] varMeanP = new double[m_Dimension], varMeanN = new double[m_Dimension];
    // Variances of sample means: for w, w=E[var(\mu)]/E[\sigma^2]
    double[] meanVarP = new double[m_Dimension], meanVarN = new double[m_Dimension];
    // number of exemplars without all values missing
    double[] numExsP = new double[m_Dimension], numExsN = new double[m_Dimension];

    // Extract metadata fro both positive and negative bags
    for (int v = 0; v < pnum; v++) {
      /*Exemplar px = pos.exemplar(v);
      m_MeanP[v] = px.meanOrMode();
      m_VarianceP[v] = px.variance();
      Instances pxi =  px.getInstances();
      */

      Instances pxi = pos.instance(v).relationalValue(1);
      for (int k = 0; k < pxi.numAttributes(); k++) {
        m_MeanP[v][k] = pxi.meanOrMode(k);
        m_VarianceP[v][k] = pxi.variance(k);
      }

      for (int w = 0, t = 0; w < m_Dimension; w++, t++) {
        // if((t==m_ClassIndex) || (t==m_IdIndex))
        //  t++;

        if (!Double.isNaN(m_MeanP[v][w])) {
          for (int u = 0; u < pxi.numInstances(); u++) {
            Instance ins = pxi.instance(u);
            if (!ins.isMissing(t)) m_SumP[v][w] += ins.weight();
          }
          numExsP[w]++;
          pSumVal[w] += m_MeanP[v][w];
          meanVarP[w] += m_MeanP[v][w] * m_MeanP[v][w];
          if (maxVarsP[w] < m_VarianceP[v][w]) maxVarsP[w] = m_VarianceP[v][w];
          varMeanP[w] += m_VarianceP[v][w];
          m_VarianceP[v][w] *= (m_SumP[v][w] - 1.0);
          if (m_VarianceP[v][w] < 0.0) m_VarianceP[v][w] = 0.0;
        }
      }
    }

    for (int v = 0; v < nnum; v++) {
      /*Exemplar nx = neg.exemplar(v);
      m_MeanN[v] = nx.meanOrMode();
      m_VarianceN[v] = nx.variance();
      Instances nxi =  nx.getInstances();
      */
      Instances nxi = neg.instance(v).relationalValue(1);
      for (int k = 0; k < nxi.numAttributes(); k++) {
        m_MeanN[v][k] = nxi.meanOrMode(k);
        m_VarianceN[v][k] = nxi.variance(k);
      }

      for (int w = 0, t = 0; w < m_Dimension; w++, t++) {
        // if((t==m_ClassIndex) || (t==m_IdIndex))
        //  t++;

        if (!Double.isNaN(m_MeanN[v][w])) {
          for (int u = 0; u < nxi.numInstances(); u++)
            if (!nxi.instance(u).isMissing(t)) m_SumN[v][w] += nxi.instance(u).weight();
          numExsN[w]++;
          nSumVal[w] += m_MeanN[v][w];
          meanVarN[w] += m_MeanN[v][w] * m_MeanN[v][w];
          if (maxVarsN[w] < m_VarianceN[v][w]) maxVarsN[w] = m_VarianceN[v][w];
          varMeanN[w] += m_VarianceN[v][w];
          m_VarianceN[v][w] *= (m_SumN[v][w] - 1.0);
          if (m_VarianceN[v][w] < 0.0) m_VarianceN[v][w] = 0.0;
        }
      }
    }

    for (int w = 0; w < m_Dimension; w++) {
      pSumVal[w] /= numExsP[w];
      nSumVal[w] /= numExsN[w];
      if (numExsP[w] > 1)
        meanVarP[w] =
            meanVarP[w] / (numExsP[w] - 1.0) - pSumVal[w] * numExsP[w] / (numExsP[w] - 1.0);
      if (numExsN[w] > 1)
        meanVarN[w] =
            meanVarN[w] / (numExsN[w] - 1.0) - nSumVal[w] * numExsN[w] / (numExsN[w] - 1.0);
      varMeanP[w] /= numExsP[w];
      varMeanN[w] /= numExsN[w];
    }

    // Bounds and parameter values for each run
    double[][] bounds = new double[2][4];
    double[] pThisParam = new double[4], nThisParam = new double[4];

    // Initial values for parameters
    double a, b, w, m;

    // Optimize for one dimension
    for (int x = 0; x < m_Dimension; x++) {
      if (getDebug()) System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Dimension #" + x);

      // Positive examplars: first run
      a = (maxVarsP[x] > ZERO) ? maxVarsP[x] : 1.0;
      if (varMeanP[x] <= ZERO) varMeanP[x] = ZERO; // modified by LinDong (09/2005)
      b = a / varMeanP[x] + 2.0; // a/(b-2) = E(\sigma^2)
      w = meanVarP[x] / varMeanP[x]; // E[var(\mu)] = w*E[\sigma^2]	
      if (w <= ZERO) w = 1.0;

      m = pSumVal[x];
      pThisParam[0] = a; // a
      pThisParam[1] = b; // b
      pThisParam[2] = w; // w
      pThisParam[3] = m; // m

      // Negative examplars: first run
      a = (maxVarsN[x] > ZERO) ? maxVarsN[x] : 1.0;
      if (varMeanN[x] <= ZERO) varMeanN[x] = ZERO; // modified by LinDong (09/2005)
      b = a / varMeanN[x] + 2.0; // a/(b-2) = E(\sigma^2)
      w = meanVarN[x] / varMeanN[x]; // E[var(\mu)] = w*E[\sigma^2]	
      if (w <= ZERO) w = 1.0;

      m = nSumVal[x];
      nThisParam[0] = a; // a
      nThisParam[1] = b; // b
      nThisParam[2] = w; // w
      nThisParam[3] = m; // m

      // Bound constraints
      bounds[0][0] = ZERO; // a > 0
      bounds[0][1] = 2.0 + ZERO; // b > 2
      bounds[0][2] = ZERO; // w > 0
      bounds[0][3] = Double.NaN;

      for (int t = 0; t < 4; t++) {
        bounds[1][t] = Double.NaN;
        m_ParamsP[4 * x + t] = pThisParam[t];
        m_ParamsN[4 * x + t] = nThisParam[t];
      }
      double pminVal = Double.MAX_VALUE, nminVal = Double.MAX_VALUE;
      Random whichEx = new Random(m_Seed);
      TLD_Optm pOp = null, nOp = null;
      boolean isRunValid = true;
      double[] sumP = new double[pnum], meanP = new double[pnum], varP = new double[pnum];
      double[] sumN = new double[nnum], meanN = new double[nnum], varN = new double[nnum];

      // One dimension
      for (int p = 0; p < pnum; p++) {
        sumP[p] = m_SumP[p][x];
        meanP[p] = m_MeanP[p][x];
        varP[p] = m_VarianceP[p][x];
      }
      for (int q = 0; q < nnum; q++) {
        sumN[q] = m_SumN[q][x];
        meanN[q] = m_MeanN[q][x];
        varN[q] = m_VarianceN[q][x];
      }

      for (int y = 0; y < m_Run; ) {
        if (getDebug()) System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Run #" + y);
        double thisMin;

        if (getDebug()) System.err.println("\nPositive exemplars");
        pOp = new TLD_Optm();
        pOp.setNum(sumP);
        pOp.setSSquare(varP);
        pOp.setXBar(meanP);

        pThisParam = pOp.findArgmin(pThisParam, bounds);
        while (pThisParam == null) {
          pThisParam = pOp.getVarbValues();
          if (getDebug()) System.err.println("!!! 200 iterations finished, not enough!");
          pThisParam = pOp.findArgmin(pThisParam, bounds);
        }

        thisMin = pOp.getMinFunction();
        if (!Double.isNaN(thisMin) && (thisMin < pminVal)) {
          pminVal = thisMin;
          for (int z = 0; z < 4; z++) m_ParamsP[4 * x + z] = pThisParam[z];
        }

        if (Double.isNaN(thisMin)) {
          pThisParam = new double[4];
          isRunValid = false;
        }

        if (getDebug()) System.err.println("\nNegative exemplars");
        nOp = new TLD_Optm();
        nOp.setNum(sumN);
        nOp.setSSquare(varN);
        nOp.setXBar(meanN);

        nThisParam = nOp.findArgmin(nThisParam, bounds);
        while (nThisParam == null) {
          nThisParam = nOp.getVarbValues();
          if (getDebug()) System.err.println("!!! 200 iterations finished, not enough!");
          nThisParam = nOp.findArgmin(nThisParam, bounds);
        }
        thisMin = nOp.getMinFunction();
        if (!Double.isNaN(thisMin) && (thisMin < nminVal)) {
          nminVal = thisMin;
          for (int z = 0; z < 4; z++) m_ParamsN[4 * x + z] = nThisParam[z];
        }

        if (Double.isNaN(thisMin)) {
          nThisParam = new double[4];
          isRunValid = false;
        }

        if (!isRunValid) {
          y--;
          isRunValid = true;
        }

        if (++y < m_Run) {
          // Change the initial parameters and restart
          int pone = whichEx.nextInt(pnum), // Randomly pick one pos. exmpl.
              none = whichEx.nextInt(nnum);

          // Positive exemplars: next run
          while ((m_SumP[pone][x] <= 1.0) || Double.isNaN(m_MeanP[pone][x]))
            pone = whichEx.nextInt(pnum);

          a = m_VarianceP[pone][x] / (m_SumP[pone][x] - 1.0);
          if (a <= ZERO) a = m_ParamsN[4 * x]; // Change to negative params
          m = m_MeanP[pone][x];
          double sq = (m - m_ParamsP[4 * x + 3]) * (m - m_ParamsP[4 * x + 3]);

          b = a * m_ParamsP[4 * x + 2] / sq + 2.0; // b=a/Var+2, assuming Var=Sq/w'
          if ((b <= ZERO) || Double.isNaN(b) || Double.isInfinite(b)) b = m_ParamsN[4 * x + 1];

          w =
              sq
                  * (m_ParamsP[4 * x + 1] - 2.0)
                  / m_ParamsP[4 * x]; // w=Sq/Var, assuming Var=a'/(b'-2)
          if ((w <= ZERO) || Double.isNaN(w) || Double.isInfinite(w)) w = m_ParamsN[4 * x + 2];

          pThisParam[0] = a; // a
          pThisParam[1] = b; // b
          pThisParam[2] = w; // w
          pThisParam[3] = m; // m	

          // Negative exemplars: next run
          while ((m_SumN[none][x] <= 1.0) || Double.isNaN(m_MeanN[none][x]))
            none = whichEx.nextInt(nnum);

          a = m_VarianceN[none][x] / (m_SumN[none][x] - 1.0);
          if (a <= ZERO) a = m_ParamsP[4 * x];
          m = m_MeanN[none][x];
          sq = (m - m_ParamsN[4 * x + 3]) * (m - m_ParamsN[4 * x + 3]);

          b = a * m_ParamsN[4 * x + 2] / sq + 2.0; // b=a/Var+2, assuming Var=Sq/w'
          if ((b <= ZERO) || Double.isNaN(b) || Double.isInfinite(b)) b = m_ParamsP[4 * x + 1];

          w =
              sq
                  * (m_ParamsN[4 * x + 1] - 2.0)
                  / m_ParamsN[4 * x]; // w=Sq/Var, assuming Var=a'/(b'-2)
          if ((w <= ZERO) || Double.isNaN(w) || Double.isInfinite(w)) w = m_ParamsP[4 * x + 2];

          nThisParam[0] = a; // a
          nThisParam[1] = b; // b
          nThisParam[2] = w; // w
          nThisParam[3] = m; // m	    	
        }
      }
    }

    for (int x = 0, y = 0; x < m_Dimension; x++, y++) {
      // if((x==exs.classIndex()) || (x==exs.idIndex()))
      // y++;
      a = m_ParamsP[4 * x];
      b = m_ParamsP[4 * x + 1];
      w = m_ParamsP[4 * x + 2];
      m = m_ParamsP[4 * x + 3];
      if (getDebug())
        System.err.println(
            "\n\n???Positive: ( "
                + exs.attribute(1).relation().attribute(y)
                + "): a="
                + a
                + ", b="
                + b
                + ", w="
                + w
                + ", m="
                + m);

      a = m_ParamsN[4 * x];
      b = m_ParamsN[4 * x + 1];
      w = m_ParamsN[4 * x + 2];
      m = m_ParamsN[4 * x + 3];
      if (getDebug())
        System.err.println(
            "???Negative: ("
                + exs.attribute(1).relation().attribute(y)
                + "): a="
                + a
                + ", b="
                + b
                + ", w="
                + w
                + ", m="
                + m);
    }

    if (m_UseEmpiricalCutOff) {
      // Find the empirical cut-off
      double[] pLogOdds = new double[pnum], nLogOdds = new double[nnum];
      for (int p = 0; p < pnum; p++)
        pLogOdds[p] = likelihoodRatio(m_SumP[p], m_MeanP[p], m_VarianceP[p]);

      for (int q = 0; q < nnum; q++)
        nLogOdds[q] = likelihoodRatio(m_SumN[q], m_MeanN[q], m_VarianceN[q]);

      // Update m_Cutoff
      findCutOff(pLogOdds, nLogOdds);
    } else m_Cutoff = -Math.log((double) pnum / (double) nnum);

    if (getDebug()) System.err.println("???Cut-off=" + m_Cutoff);
  }

コード例 #8

ファイルを表示

ファイル: UnivariateLinearRegression.java プロジェクト: bigbigbug/wekax

  public void buildClassifier(Instances insts) throws Exception {

    // Compute mean of target value
    double yMean = insts.meanOrMode(insts.classIndex());

    // Choose best attribute
    double minMsq = Double.MAX_VALUE;
    m_attribute = null;
    int chosen = -1;
    double chosenSlope = Double.NaN;
    double chosenIntercept = Double.NaN;
    for (int i = 0; i < insts.numAttributes(); i++) {
      if (i != insts.classIndex()) {
        if (!insts.attribute(i).isNumeric()) {
          throw new Exception("UnivariateLinearRegression: Only numeric attributes!");
        }
        m_attribute = insts.attribute(i);

        // Compute slope and intercept
        double xMean = insts.meanOrMode(i);
        double sumWeightedXDiffSquared = 0;
        double sumWeightedYDiffSquared = 0;
        m_slope = 0;
        for (int j = 0; j < insts.numInstances(); j++) {
          Instance inst = insts.instance(j);
          if (!inst.isMissing(i) && !inst.classIsMissing()) {
            double xDiff = inst.value(i) - xMean;
            double yDiff = inst.classValue() - yMean;
            double weightedXDiff = inst.weight() * xDiff;
            double weightedYDiff = inst.weight() * yDiff;
            m_slope += weightedXDiff * yDiff;
            sumWeightedXDiffSquared += weightedXDiff * xDiff;
            sumWeightedYDiffSquared += weightedYDiff * yDiff;
          }
        }

        // Skip attribute if not useful
        if (sumWeightedXDiffSquared == 0) {
          continue;
        }
        double numerator = m_slope;
        m_slope /= sumWeightedXDiffSquared;
        m_intercept = yMean - m_slope * xMean;

        // Compute sum of squared errors
        double msq = sumWeightedYDiffSquared - m_slope * numerator;

        // Check whether this is the best attribute
        if (msq < minMsq) {
          minMsq = msq;
          chosen = i;
          chosenSlope = m_slope;
          chosenIntercept = m_intercept;
        }
      }
    }

    // Set parameters
    if (chosen == -1) {

      System.err.println("----- no useful attribute found");
      m_attribute = null;
      m_slope = 0;
      m_intercept = yMean;
    } else {
      m_attribute = insts.attribute(chosen);
      m_slope = chosenSlope;
      m_intercept = chosenIntercept;
    }
  }

コード例 #9

ファイルを表示

ファイル: DecisionTable.java プロジェクト: alishakiba/jDenetX

  /**
   * Generates the classifier.
   *
   * @param data set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    m_theInstances = new Instances(data);
    m_theInstances.deleteWithMissingClass();

    m_rr = new Random(1);

    if (m_theInstances.classAttribute().isNominal()) { // 	 Set up class priors
      m_classPriorCounts = new double[data.classAttribute().numValues()];
      Arrays.fill(m_classPriorCounts, 1.0);
      for (int i = 0; i < data.numInstances(); i++) {
        Instance curr = data.instance(i);
        m_classPriorCounts[(int) curr.classValue()] += curr.weight();
      }
      m_classPriors = m_classPriorCounts.clone();
      Utils.normalize(m_classPriors);
    }

    setUpEvaluator();

    if (m_theInstances.classAttribute().isNumeric()) {
      m_disTransform = new weka.filters.unsupervised.attribute.Discretize();
      m_classIsNominal = false;

      // use binned discretisation if the class is numeric
      ((weka.filters.unsupervised.attribute.Discretize) m_disTransform).setBins(10);
      ((weka.filters.unsupervised.attribute.Discretize) m_disTransform).setInvertSelection(true);

      // Discretize all attributes EXCEPT the class
      String rangeList = "";
      rangeList += (m_theInstances.classIndex() + 1);
      // System.out.println("The class col: "+m_theInstances.classIndex());

      ((weka.filters.unsupervised.attribute.Discretize) m_disTransform)
          .setAttributeIndices(rangeList);
    } else {
      m_disTransform = new weka.filters.supervised.attribute.Discretize();
      ((weka.filters.supervised.attribute.Discretize) m_disTransform).setUseBetterEncoding(true);
      m_classIsNominal = true;
    }

    m_disTransform.setInputFormat(m_theInstances);
    m_theInstances = Filter.useFilter(m_theInstances, m_disTransform);

    m_numAttributes = m_theInstances.numAttributes();
    m_numInstances = m_theInstances.numInstances();
    m_majority = m_theInstances.meanOrMode(m_theInstances.classAttribute());

    // Perform the search
    int[] selected = m_search.search(m_evaluator, m_theInstances);

    m_decisionFeatures = new int[selected.length + 1];
    System.arraycopy(selected, 0, m_decisionFeatures, 0, selected.length);
    m_decisionFeatures[m_decisionFeatures.length - 1] = m_theInstances.classIndex();

    // reduce instances to selected features
    m_delTransform = new Remove();
    m_delTransform.setInvertSelection(true);

    // set features to keep
    m_delTransform.setAttributeIndicesArray(m_decisionFeatures);
    m_delTransform.setInputFormat(m_theInstances);
    m_dtInstances = Filter.useFilter(m_theInstances, m_delTransform);

    // reset the number of attributes
    m_numAttributes = m_dtInstances.numAttributes();

    // create hash table
    m_entries = new Hashtable((int) (m_dtInstances.numInstances() * 1.5));

    // insert instances into the hash table
    for (int i = 0; i < m_numInstances; i++) {
      Instance inst = m_dtInstances.instance(i);
      insertIntoTable(inst, null);
    }

    // Replace the global table majority with nearest neighbour?
    if (m_useIBk) {
      m_ibk = new IBk();
      m_ibk.buildClassifier(m_theInstances);
    }

    // Save memory
    if (m_saveMemory) {
      m_theInstances = new Instances(m_theInstances, 0);
      m_dtInstances = new Instances(m_dtInstances, 0);
    }
    m_evaluation = null;
  }

コード例 #10

ファイルを表示

ファイル: CascadeSimpleKMeans.java プロジェクト: CSLeicester/weka

  @Override
  public void buildClusterer(Instances data) throws Exception {
    reset();
    meanInstance = new DenseInstance(data.numAttributes());
    for (int i = 0; i < data.numAttributes(); i++) meanInstance.setValue(i, data.meanOrMode(i));
    numInstances = data.numInstances();

    kMeans.setDistanceFunction(distanceFunction);
    kMeans.setMaxIterations(maxIterations);
    //    kMeans.setInitializeUsingKMeansPlusPlusMethod(initializeWithKMeansPlusPlus);
    if (initializeWithKMeansPlusPlus) {
      kMeans.setInitializationMethod(
          new weka.core.SelectedTag(SimpleKMeans.KMEANS_PLUS_PLUS, SimpleKMeans.TAGS_SELECTION));
    }

    /** step 1: iterate over all restarts and possible k values, record CH-scores */
    Random r = new Random(m_Seed);
    double meanCHs[] = new double[maxNumClusters + 1 - minNumClusters];
    double maxCHs[] = new double[maxNumClusters + 1 - minNumClusters];
    int maxSeed[] = new int[maxNumClusters + 1 - minNumClusters];

    for (int i = 0; i < restarts; i++) {
      if (printDebug) System.out.println("cascade> restarts: " + (i + 1) + " / " + restarts);

      for (int k = minNumClusters; k <= maxNumClusters; k++) {
        if (printDebug) System.out.print("cascade>  k:" + k + " ");

        int seed = r.nextInt();
        kMeans.setSeed(seed);
        kMeans.setNumClusters(k);
        kMeans.buildClusterer(data);
        double ch = getCalinskiHarabasz();

        int index = k - minNumClusters;
        meanCHs[index] = (meanCHs[index] * i + ch) / (double) (i + 1);
        if (i == 0 || ch > maxCHs[index]) {
          maxCHs[index] = ch;
          maxSeed[index] = seed;
        }

        if (printDebug)
          System.out.println(
              " CH:"
                  + df.format(ch)
                  + "  W:"
                  + df.format(
                      kMeans.getSquaredError() / (double) (numInstances - kMeans.getNumClusters()))
                  + " (unweighted:"
                  + df.format(kMeans.getSquaredError())
                  + ")  B:"
                  + df.format(
                      getSquaredErrorBetweenClusters() / (double) (kMeans.getNumClusters() - 1))
                  + " (unweighted:"
                  + df.format(getSquaredErrorBetweenClusters())
                  + ") ");
      }
    }
    if (printDebug) {
      String s = "cascade> max CH: [ ";
      for (int i = 0; i < maxSeed.length; i++) s += df.format(maxCHs[i]) + " ";
      System.out.println(s + "]");
    }
    String s = "cascade> mean CH: [ ";
    for (int i = 0; i < maxSeed.length; i++) s += df.format(meanCHs[i]) + " ";

    finalMeanCH = s + "]";
    //    System.out.println(s + "]");

    /** step 2: select k with best mean CH-score; select seed for max CH score for this k */
    int bestK = -1;
    double maxCH = -1;
    for (int k = minNumClusters; k <= maxNumClusters; k++) {
      int index = k - minNumClusters;
      if (bestK == -1 || meanCHs[index] > maxCH) {
        maxCH = meanCHs[index];
        bestK = k;
      }
    }
    if (manuallySelectNumClusters) {
      int selectedK = selectKManually(meanCHs, bestK);
      if (selectedK != -1) bestK = selectedK;
    }
    int bestSeed = maxSeed[bestK - minNumClusters];

    finalBestK = bestK;
    finalBestSeed = bestSeed;
    //    System.out.println("cascade> k (yields highest mean CH): " + bestK);
    //    System.out.println("cascade> seed (highest CH for k=" + bestK + ") : " + bestSeed);

    kMeans.setSeed(bestSeed);
    kMeans.setNumClusters(bestK);
    kMeans.buildClusterer(data);
  }

コード例 #11

ファイルを表示

ファイル: TLDSimple.java プロジェクト: dachylong/weka

  /**
   * @param exs the training exemplars
   * @throws Exception if the model cannot be built properly
   */
  public void buildClassifier(Instances exs) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(exs);

    // remove instances with missing class
    exs = new Instances(exs);
    exs.deleteWithMissingClass();

    int numegs = exs.numInstances();
    m_Dimension = exs.attribute(1).relation().numAttributes();
    m_Attribute = exs.attribute(1).relation().stringFreeStructure();
    Instances pos = new Instances(exs, 0), neg = new Instances(exs, 0);

    // Divide into two groups
    for (int u = 0; u < numegs; u++) {
      Instance example = exs.instance(u);
      if (example.classValue() == 1) pos.add(example);
      else neg.add(example);
    }
    int pnum = pos.numInstances(), nnum = neg.numInstances();

    // xBar, n
    m_MeanP = new double[pnum][m_Dimension];
    m_SumP = new double[pnum][m_Dimension];
    m_MeanN = new double[nnum][m_Dimension];
    m_SumN = new double[nnum][m_Dimension];
    // w, m
    m_ParamsP = new double[2 * m_Dimension];
    m_ParamsN = new double[2 * m_Dimension];
    // \sigma^2
    m_SgmSqP = new double[m_Dimension];
    m_SgmSqN = new double[m_Dimension];
    // S^2
    double[][] varP = new double[pnum][m_Dimension], varN = new double[nnum][m_Dimension];
    // numOfEx 'e' without all missing
    double[] effNumExP = new double[m_Dimension], effNumExN = new double[m_Dimension];
    // For the starting values
    double[] pMM = new double[m_Dimension],
        nMM = new double[m_Dimension],
        pVM = new double[m_Dimension],
        nVM = new double[m_Dimension];
    // # of exemplars with only one instance
    double[] numOneInsExsP = new double[m_Dimension], numOneInsExsN = new double[m_Dimension];
    // sum_i(1/n_i)
    double[] pInvN = new double[m_Dimension], nInvN = new double[m_Dimension];

    // Extract metadata from both positive and negative bags
    for (int v = 0; v < pnum; v++) {
      // Instance px = pos.instance(v);
      Instances pxi = pos.instance(v).relationalValue(1);
      for (int k = 0; k < pxi.numAttributes(); k++) {
        m_MeanP[v][k] = pxi.meanOrMode(k);
        varP[v][k] = pxi.variance(k);
      }

      for (int w = 0, t = 0; w < m_Dimension; w++, t++) {
        // if((t==m_ClassIndex) || (t==m_IdIndex))
        //  t++;
        if (varP[v][w] <= 0.0) varP[v][w] = 0.0;
        if (!Double.isNaN(m_MeanP[v][w])) {

          for (int u = 0; u < pxi.numInstances(); u++)
            if (!pxi.instance(u).isMissing(t)) m_SumP[v][w] += pxi.instance(u).weight();

          pMM[w] += m_MeanP[v][w];
          pVM[w] += m_MeanP[v][w] * m_MeanP[v][w];
          if ((m_SumP[v][w] > 1) && (varP[v][w] > ZERO)) {

            m_SgmSqP[w] += varP[v][w] * (m_SumP[v][w] - 1.0) / m_SumP[v][w];

            // m_SgmSqP[w] += varP[v][w]*(m_SumP[v][w]-1.0);
            effNumExP[w]++; // Not count exemplars with 1 instance
            pInvN[w] += 1.0 / m_SumP[v][w];
            // pInvN[w] += m_SumP[v][w];
          } else numOneInsExsP[w]++;
        }
      }
    }

    for (int v = 0; v < nnum; v++) {
      // Instance nx = neg.instance(v);
      Instances nxi = neg.instance(v).relationalValue(1);
      for (int k = 0; k < nxi.numAttributes(); k++) {
        m_MeanN[v][k] = nxi.meanOrMode(k);
        varN[v][k] = nxi.variance(k);
      }
      // Instances nxi =  nx.getInstances();

      for (int w = 0, t = 0; w < m_Dimension; w++, t++) {

        // if((t==m_ClassIndex) || (t==m_IdIndex))
        //  t++;
        if (varN[v][w] <= 0.0) varN[v][w] = 0.0;
        if (!Double.isNaN(m_MeanN[v][w])) {
          for (int u = 0; u < nxi.numInstances(); u++)
            if (!nxi.instance(u).isMissing(t)) m_SumN[v][w] += nxi.instance(u).weight();

          nMM[w] += m_MeanN[v][w];
          nVM[w] += m_MeanN[v][w] * m_MeanN[v][w];
          if ((m_SumN[v][w] > 1) && (varN[v][w] > ZERO)) {
            m_SgmSqN[w] += varN[v][w] * (m_SumN[v][w] - 1.0) / m_SumN[v][w];
            // m_SgmSqN[w] += varN[v][w]*(m_SumN[v][w]-1.0);
            effNumExN[w]++; // Not count exemplars with 1 instance
            nInvN[w] += 1.0 / m_SumN[v][w];
            // nInvN[w] += m_SumN[v][w];
          } else numOneInsExsN[w]++;
        }
      }
    }

    // Expected \sigma^2
    /* if m_SgmSqP[u] or m_SgmSqN[u] is 0, assign 0 to sigma^2.
     * Otherwise, may cause k m_SgmSqP / m_SgmSqN to be NaN.
     * Modified by Lin Dong (Sep. 2005)
     */
    for (int u = 0; u < m_Dimension; u++) {
      // For exemplars with only one instance, use avg(\sigma^2) of other exemplars
      if (m_SgmSqP[u] != 0) m_SgmSqP[u] /= (effNumExP[u] - pInvN[u]);
      else m_SgmSqP[u] = 0;
      if (m_SgmSqN[u] != 0) m_SgmSqN[u] /= (effNumExN[u] - nInvN[u]);
      else m_SgmSqN[u] = 0;

      // m_SgmSqP[u] /= (pInvN[u]-effNumExP[u]);
      // m_SgmSqN[u] /= (nInvN[u]-effNumExN[u]);
      effNumExP[u] += numOneInsExsP[u];
      effNumExN[u] += numOneInsExsN[u];
      pMM[u] /= effNumExP[u];
      nMM[u] /= effNumExN[u];
      pVM[u] =
          pVM[u] / (effNumExP[u] - 1.0) - pMM[u] * pMM[u] * effNumExP[u] / (effNumExP[u] - 1.0);
      nVM[u] =
          nVM[u] / (effNumExN[u] - 1.0) - nMM[u] * nMM[u] * effNumExN[u] / (effNumExN[u] - 1.0);
    }

    // Bounds and parameter values for each run
    double[][] bounds = new double[2][2];
    double[] pThisParam = new double[2], nThisParam = new double[2];

    // Initial values for parameters
    double w, m;
    Random whichEx = new Random(m_Seed);

    // Optimize for one dimension
    for (int x = 0; x < m_Dimension; x++) {
      // System.out.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Dimension #"+x);

      // Positive examplars: first run
      pThisParam[0] = pVM[x]; // w
      if (pThisParam[0] <= ZERO) pThisParam[0] = 1.0;
      pThisParam[1] = pMM[x]; // m

      // Negative examplars: first run
      nThisParam[0] = nVM[x]; // w
      if (nThisParam[0] <= ZERO) nThisParam[0] = 1.0;
      nThisParam[1] = nMM[x]; // m

      // Bound constraints
      bounds[0][0] = ZERO; // w > 0
      bounds[0][1] = Double.NaN;
      bounds[1][0] = Double.NaN;
      bounds[1][1] = Double.NaN;

      double pminVal = Double.MAX_VALUE, nminVal = Double.MAX_VALUE;
      TLDSimple_Optm pOp = null, nOp = null;
      boolean isRunValid = true;
      double[] sumP = new double[pnum], meanP = new double[pnum];
      double[] sumN = new double[nnum], meanN = new double[nnum];

      // One dimension
      for (int p = 0; p < pnum; p++) {
        sumP[p] = m_SumP[p][x];
        meanP[p] = m_MeanP[p][x];
      }
      for (int q = 0; q < nnum; q++) {
        sumN[q] = m_SumN[q][x];
        meanN[q] = m_MeanN[q][x];
      }

      for (int y = 0; y < m_Run; y++) {
        // System.out.println("\n\n!!!!!!!!!Positive exemplars: Run #"+y);
        double thisMin;
        pOp = new TLDSimple_Optm();
        pOp.setNum(sumP);
        pOp.setSgmSq(m_SgmSqP[x]);
        if (getDebug()) System.out.println("m_SgmSqP[" + x + "]= " + m_SgmSqP[x]);
        pOp.setXBar(meanP);
        // pOp.setDebug(true);
        pThisParam = pOp.findArgmin(pThisParam, bounds);
        while (pThisParam == null) {
          pThisParam = pOp.getVarbValues();
          if (getDebug()) System.out.println("!!! 200 iterations finished, not enough!");
          pThisParam = pOp.findArgmin(pThisParam, bounds);
        }

        thisMin = pOp.getMinFunction();
        if (!Double.isNaN(thisMin) && (thisMin < pminVal)) {
          pminVal = thisMin;
          for (int z = 0; z < 2; z++) m_ParamsP[2 * x + z] = pThisParam[z];
        }

        if (Double.isNaN(thisMin)) {
          pThisParam = new double[2];
          isRunValid = false;
        }
        if (!isRunValid) {
          y--;
          isRunValid = true;
        }

        // Change the initial parameters and restart
        int pone = whichEx.nextInt(pnum);

        // Positive exemplars: next run
        while (Double.isNaN(m_MeanP[pone][x])) pone = whichEx.nextInt(pnum);

        m = m_MeanP[pone][x];
        w = (m - pThisParam[1]) * (m - pThisParam[1]);
        pThisParam[0] = w; // w
        pThisParam[1] = m; // m	
      }

      for (int y = 0; y < m_Run; y++) {
        // System.out.println("\n\n!!!!!!!!!Negative exemplars: Run #"+y);
        double thisMin;
        nOp = new TLDSimple_Optm();
        nOp.setNum(sumN);
        nOp.setSgmSq(m_SgmSqN[x]);
        if (getDebug()) System.out.println(m_SgmSqN[x]);
        nOp.setXBar(meanN);
        // nOp.setDebug(true);
        nThisParam = nOp.findArgmin(nThisParam, bounds);

        while (nThisParam == null) {
          nThisParam = nOp.getVarbValues();
          if (getDebug()) System.out.println("!!! 200 iterations finished, not enough!");
          nThisParam = nOp.findArgmin(nThisParam, bounds);
        }

        thisMin = nOp.getMinFunction();
        if (!Double.isNaN(thisMin) && (thisMin < nminVal)) {
          nminVal = thisMin;
          for (int z = 0; z < 2; z++) m_ParamsN[2 * x + z] = nThisParam[z];
        }

        if (Double.isNaN(thisMin)) {
          nThisParam = new double[2];
          isRunValid = false;
        }

        if (!isRunValid) {
          y--;
          isRunValid = true;
        }

        // Change the initial parameters and restart
        int none = whichEx.nextInt(nnum); // Randomly pick one pos. exmpl.

        // Negative exemplars: next run
        while (Double.isNaN(m_MeanN[none][x])) none = whichEx.nextInt(nnum);

        m = m_MeanN[none][x];
        w = (m - nThisParam[1]) * (m - nThisParam[1]);
        nThisParam[0] = w; // w
        nThisParam[1] = m; // m	 	
      }
    }

    m_LkRatio = new double[m_Dimension];

    if (m_UseEmpiricalCutOff) {
      // Find the empirical cut-off
      double[] pLogOdds = new double[pnum], nLogOdds = new double[nnum];
      for (int p = 0; p < pnum; p++) pLogOdds[p] = likelihoodRatio(m_SumP[p], m_MeanP[p]);

      for (int q = 0; q < nnum; q++) nLogOdds[q] = likelihoodRatio(m_SumN[q], m_MeanN[q]);

      // Update m_Cutoff
      findCutOff(pLogOdds, nLogOdds);
    } else m_Cutoff = -Math.log((double) pnum / (double) nnum);

    /*
    for(int x=0, y=0; x<m_Dimension; x++, y++){
    if((x==exs.classIndex()) || (x==exs.idIndex()))
    y++;

    w=m_ParamsP[2*x]; m=m_ParamsP[2*x+1];
    System.err.println("\n\n???Positive: ( "+exs.attribute(y)+
    "):  w="+w+", m="+m+", sgmSq="+m_SgmSqP[x]);

    w=m_ParamsN[2*x]; m=m_ParamsN[2*x+1];
    System.err.println("???Negative: ("+exs.attribute(y)+
    "):  w="+w+", m="+m+", sgmSq="+m_SgmSqN[x]+
    "\nAvg. log-likelihood ratio in training data="
    +(m_LkRatio[x]/(pnum+nnum)));
    }
    */
    if (getDebug()) System.err.println("\n\n???Cut-off=" + m_Cutoff);
  }