Beispiel #1
0
  /**
   * Calculates the sample likelihood and BIC score for i given its parents in a simple SEM model.
   */
  private double localSemScore(int i, int[] parents) {
    try {
      ICovarianceMatrix cov = getCovMatrix();
      double varianceY = cov.getValue(i, i);
      double residualVariance = varianceY;
      int n = sampleSize();
      int p = parents.length;
      int k = (p * (p + 1)) / 2 + p;
      //            int k = (p + 1) * (p + 1);
      //            int k = p + 1;
      TetradMatrix covxx = cov.getSelection(parents, parents);
      TetradMatrix covxxInv = covxx.inverse();
      TetradVector covxy = cov.getSelection(parents, new int[] {i}).getColumn(0);
      TetradVector b = covxxInv.times(covxy);
      residualVariance -= covxy.dotProduct(b);

      if (residualVariance <= 0 && verbose) {
        out.println(
            "Nonpositive residual varianceY: resVar / varianceY = "
                + (residualVariance / varianceY));
        return Double.NaN;
      }

      double c = getPenaltyDiscount();

      //            return -n * log(residualVariance) - 2 * k; //AIC
      return -n * Math.log(residualVariance) - c * k * Math.log(n);
      //            return -n * log(residualVariance) - c * k * (log(n) - log(2 * PI));
    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException(e);
      //            throwMinimalLinearDependentSet(parents, cov);
    }
  }
Beispiel #2
0
  private double score(Node y, List<Node> parents) {
    if (score == Score.andersonDarling) {
      return andersonDarlingPASquareStar(y, parents);
    } else if (score == Score.kurtosis) {
      return Math.abs(StatUtils.kurtosis(residual(y, parents)));
    } else if (score == Score.skew) {
      return Math.abs(StatUtils.skewness(residual(y, parents)));
    } else if (score == Score.fifthMoment) {
      return Math.abs(StatUtils.standardizedFifthMoment(residual(y, parents)));
    } else if (score == Score.absoluteValue) {
      return localScoreA(y, parents);
    }

    throw new IllegalStateException();
  }
 public static List<Set<Node>> powerSet(List<Node> nodes) {
   List<Set<Node>> subsets = new ArrayList<Set<Node>>();
   int total = (int) Math.pow(2, nodes.size());
   for (int i = 0; i < total; i++) {
     Set<Node> newSet = new HashSet<Node>();
     String selection = Integer.toBinaryString(i);
     for (int j = selection.length() - 1; j >= 0; j--) {
       if (selection.charAt(j) == '1') {
         newSet.add(nodes.get(selection.length() - j - 1));
       }
     }
     subsets.add(newSet);
   }
   return subsets;
 }
  public static boolean existsLocalSepsetWithoutDet(
      Node x, Node y, Node z, IndependenceTest test, Graph graph, int depth) {
    Set<Node> __nodes = new HashSet<Node>(graph.getAdjacentNodes(x));
    __nodes.addAll(graph.getAdjacentNodes(z));
    __nodes.remove(x);
    __nodes.remove(z);
    List<Node> _nodes = new LinkedList<Node>(__nodes);
    TetradLogger.getInstance()
        .log("adjacencies", "Adjacents for " + x + "--" + y + "--" + z + " = " + _nodes);

    int _depth = depth;
    if (_depth == -1) {
      _depth = 1000;
    }
    _depth = Math.min(_depth, _nodes.size());

    for (int d = 0; d <= _depth; d++) {
      if (_nodes.size() >= d) {
        ChoiceGenerator cg2 = new ChoiceGenerator(_nodes.size(), d);
        int[] choice;

        while ((choice = cg2.next()) != null) {
          List<Node> condSet = asList(choice, _nodes);

          if (condSet.contains(y)) {
            continue;
          }

          if (test.determines(condSet, y)) {
            continue;
          }

          //        LogUtils.getInstance().finest("Trying " + condSet);

          if (test.isIndependent(x, z, condSet)) {
            return true;
          }
        }
      }
    }

    return false;
  }
  public static CpcTripleType getCpcTripleType(
      Node x, Node y, Node z, IndependenceTest test, int depth, Graph graph) {
    //    	System.out.println("getCpcTripleType 1");

    boolean existsSepsetContainingY = false;
    boolean existsSepsetNotContainingY = false;

    Set<Node> __nodes = new HashSet<Node>(graph.getAdjacentNodes(x));
    __nodes.remove(z);

    //    	System.out.println("getCpcTripleType 2");

    List<Node> _nodes = new LinkedList<Node>(__nodes);
    TetradLogger.getInstance()
        .log("adjacencies", "Adjacents for " + x + "--" + y + "--" + z + " = " + _nodes);

    //        System.out.println("getCpcTripleType 3");

    int _depth = depth;
    if (_depth == -1) {
      _depth = 1000;
    }
    _depth = Math.min(_depth, _nodes.size());

    //    	System.out.println("getCpcTripleType 4");

    for (int d = 0; d <= _depth; d++) {
      //        	System.out.println("getCpcTripleType 5");

      ChoiceGenerator cg = new ChoiceGenerator(_nodes.size(), d);
      int[] choice;

      while ((choice = cg.next()) != null) {
        //            	System.out.println("getCpcTripleType 6");

        List<Node> condSet = GraphUtils.asList(choice, _nodes);

        //            	System.out.println("getCpcTripleType 7");

        if (test.isIndependent(x, z, condSet)) {
          if (condSet.contains(y)) {
            existsSepsetContainingY = true;
          } else {
            existsSepsetNotContainingY = true;
          }
        }
      }
    }

    //    	System.out.println("getCpcTripleType 8");

    __nodes = new HashSet<Node>(graph.getAdjacentNodes(z));
    __nodes.remove(x);

    _nodes = new LinkedList<Node>(__nodes);
    TetradLogger.getInstance()
        .log("adjacencies", "Adjacents for " + x + "--" + y + "--" + z + " = " + _nodes);

    //    	System.out.println("getCpcTripleType 9");

    _depth = depth;
    if (_depth == -1) {
      _depth = 1000;
    }
    _depth = Math.min(_depth, _nodes.size());

    //    	System.out.println("getCpcTripleType 10");

    for (int d = 0; d <= _depth; d++) {
      //        	System.out.println("getCpcTripleType 11");

      ChoiceGenerator cg = new ChoiceGenerator(_nodes.size(), d);
      int[] choice;

      while ((choice = cg.next()) != null) {
        List<Node> condSet = GraphUtils.asList(choice, _nodes);

        if (test.isIndependent(x, z, condSet)) {
          if (condSet.contains(y)) {
            existsSepsetContainingY = true;
          } else {
            existsSepsetNotContainingY = true;
          }
        }
      }
    }

    //    	System.out.println("getCpcTripleType 12");

    if (existsSepsetContainingY == existsSepsetNotContainingY) {
      return CpcTripleType.AMBIGUOUS;
    } else if (!existsSepsetNotContainingY) {
      return CpcTripleType.NONCOLLIDER;
    } else {
      return CpcTripleType.COLLIDER;
    }
  }
Beispiel #6
0
  private double localScoreB(Node node, List<Node> parents) {

    double score = 0.0;
    double maxScore = Double.NEGATIVE_INFINITY;

    Node _target = node;
    List<Node> _regressors = parents;
    Node target = getVariable(variables, _target.getName());
    List<Node> regressors = new ArrayList<Node>();

    for (Node _regressor : _regressors) {
      Node variable = getVariable(variables, _regressor.getName());
      regressors.add(variable);
    }

    DATASET:
    for (int m = 0; m < dataSets.size(); m++) {
      RegressionResult result = regressions.get(m).regress(target, regressors);
      TetradVector residualsSingleDataset = result.getResiduals();
      DoubleArrayList _residualsSingleDataset =
          new DoubleArrayList(residualsSingleDataset.toArray());

      for (int h = 0; h < residualsSingleDataset.size(); h++) {
        if (Double.isNaN(residualsSingleDataset.get(h))) {
          continue DATASET;
        }
      }

      double mean = Descriptive.mean(_residualsSingleDataset);
      double std =
          Descriptive.standardDeviation(
              Descriptive.variance(
                  _residualsSingleDataset.size(),
                  Descriptive.sum(_residualsSingleDataset),
                  Descriptive.sumOfSquares(_residualsSingleDataset)));

      for (int i2 = 0; i2 < _residualsSingleDataset.size(); i2++) {
        _residualsSingleDataset.set(i2, (_residualsSingleDataset.get(i2) - mean) / std);
      }

      double[] _f = new double[_residualsSingleDataset.size()];

      for (int k = 0; k < _residualsSingleDataset.size(); k++) {
        _f[k] = _residualsSingleDataset.get(k);
      }

      DoubleArrayList f = new DoubleArrayList(_f);

      for (int k = 0; k < f.size(); k++) {
        f.set(k, Math.abs(f.get(k)));
      }

      double _mean = Descriptive.mean(f);
      double diff = _mean - Math.sqrt(2.0 / Math.PI);
      score += diff * diff;

      if (score > maxScore) {
        maxScore = score;
      }
    }

    double avg = score / dataSets.size();

    return avg;
  }
Beispiel #7
0
  /**
   * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM.
   *
   * @param im Stop asking me for these things! The given SEM IM!!!
   * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the
   *     given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM,
   *     standardized, and estimated.
   */
  public StandardizedSemIm(SemIm im, Initialization initialization) {
    this.semPm = new SemPm(im.getSemPm());
    this.semGraph = new SemGraph(semPm.getGraph());
    semGraph.setShowErrorTerms(true);

    if (semGraph.existsDirectedCycle()) {
      throw new IllegalArgumentException("The cyclic case is not handled.");
    }

    if (initialization == Initialization.CALCULATE_FROM_SEM) {
      //         This code calculates the new coefficients directly from the old ones.
      edgeParameters = new HashMap<Edge, Double>();

      List<Node> nodes = im.getVariableNodes();
      TetradMatrix impliedCovar = im.getImplCovar(true);

      for (Parameter parameter : im.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          int aindex = nodes.indexOf(a);
          int bindex = nodes.indexOf(b);
          double vara = impliedCovar.get(aindex, aindex);
          double stda = Math.sqrt(vara);
          double varb = impliedCovar.get(bindex, bindex);
          double stdb = Math.sqrt(varb);
          double oldCoef = im.getEdgeCoef(a, b);
          double newCoef = (stda / stdb) * oldCoef;
          edgeParameters.put(Edges.directedEdge(a, b), newCoef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    } else {

      // This code estimates the new coefficients from simulated data from the old model.
      DataSet dataSet = im.simulateData(1000, false);
      TetradMatrix _dataSet = dataSet.getDoubleData();
      _dataSet = DataUtils.standardizeData(_dataSet);
      DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet);

      SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm());
      SemIm imStandardized = estimator.estimate();

      edgeParameters = new HashMap<Edge, Double>();

      for (Parameter parameter : imStandardized.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          double coef = imStandardized.getEdgeCoef(a, b);
          edgeParameters.put(Edges.directedEdge(a, b), coef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    }

    this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes());
  }