Example #1
0
  /**
   * @param node The variable node in question.
   * @return the error node for the given node.
   */
  public Node getErrorNode(Node node) {
    if (errorNodes.contains(node)) {
      return node;
    }

    int index = variableNodes.indexOf(node);

    if (index == -1) {
      throw new NullPointerException(
          node
              + " is not a node in this model. Perhaps "
              + "it's another node with the same name.");
    }

    return errorNodes.get(index);
  }
Example #2
0
  /**
   * @return The edge coefficient matrix of the model, a la SemIm. Note that this will normally need
   *     to be transposed, since [a][b] is the edge coefficient for a-->b, not b-->a. Sorry.
   *     History. THESE ARE PARAMETERS OF THE MODEL--THE ONLY PARAMETERS.
   */
  public TetradMatrix edgeCoef() {
    List<Node> variableNodes = getVariableNodes();

    TetradMatrix edgeCoef = new TetradMatrix(variableNodes.size(), variableNodes.size());

    for (Edge edge : edgeParameters.keySet()) {
      if (Edges.isBidirectedEdge(edge)) {
        continue;
      }

      Node a = edge.getNode1();
      Node b = edge.getNode2();

      int aindex = variableNodes.indexOf(a);
      int bindex = variableNodes.indexOf(b);

      double coef = edgeParameters.get(edge);

      edgeCoef.set(aindex, bindex, coef);
    }

    return edgeCoef;
  }
Example #3
0
 private void buildIndexing(Graph graph) {
   this.hashIndices = new HashMap<Node, Integer>();
   for (Node node : graph.getNodes()) {
     this.hashIndices.put(node, variables.indexOf(node));
   }
 }
Example #4
0
  /**
   * Constructs a new standardized SEM IM from the freeParameters in the given SEM IM.
   *
   * @param im Stop asking me for these things! The given SEM IM!!!
   * @param initialization CALCULATE_FROM_SEM if the initial values will be calculated from the
   *     given SEM IM; INITIALIZE_FROM_DATA if data will be simulated from the given SEM,
   *     standardized, and estimated.
   */
  public StandardizedSemIm(SemIm im, Initialization initialization) {
    this.semPm = new SemPm(im.getSemPm());
    this.semGraph = new SemGraph(semPm.getGraph());
    semGraph.setShowErrorTerms(true);

    if (semGraph.existsDirectedCycle()) {
      throw new IllegalArgumentException("The cyclic case is not handled.");
    }

    if (initialization == Initialization.CALCULATE_FROM_SEM) {
      //         This code calculates the new coefficients directly from the old ones.
      edgeParameters = new HashMap<Edge, Double>();

      List<Node> nodes = im.getVariableNodes();
      TetradMatrix impliedCovar = im.getImplCovar(true);

      for (Parameter parameter : im.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          int aindex = nodes.indexOf(a);
          int bindex = nodes.indexOf(b);
          double vara = impliedCovar.get(aindex, aindex);
          double stda = Math.sqrt(vara);
          double varb = impliedCovar.get(bindex, bindex);
          double stdb = Math.sqrt(varb);
          double oldCoef = im.getEdgeCoef(a, b);
          double newCoef = (stda / stdb) * oldCoef;
          edgeParameters.put(Edges.directedEdge(a, b), newCoef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    } else {

      // This code estimates the new coefficients from simulated data from the old model.
      DataSet dataSet = im.simulateData(1000, false);
      TetradMatrix _dataSet = dataSet.getDoubleData();
      _dataSet = DataUtils.standardizeData(_dataSet);
      DataSet dataSetStandardized = ColtDataSet.makeData(dataSet.getVariables(), _dataSet);

      SemEstimator estimator = new SemEstimator(dataSetStandardized, im.getSemPm());
      SemIm imStandardized = estimator.estimate();

      edgeParameters = new HashMap<Edge, Double>();

      for (Parameter parameter : imStandardized.getSemPm().getParameters()) {
        if (parameter.getType() == ParamType.COEF) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          double coef = imStandardized.getEdgeCoef(a, b);
          edgeParameters.put(Edges.directedEdge(a, b), coef);
        } else if (parameter.getType() == ParamType.COVAR) {
          Node a = parameter.getNodeA();
          Node b = parameter.getNodeB();
          Node exoa = semGraph.getExogenous(a);
          Node exob = semGraph.getExogenous(b);
          double covar = -im.getErrCovar(a, b) / Math.sqrt(im.getErrVar(a) * im.getErrVar(b));
          edgeParameters.put(Edges.bidirectedEdge(exoa, exob), covar);
        }
      }
    }

    this.measuredNodes = Collections.unmodifiableList(semPm.getMeasuredNodes());
  }
  // Trying to optimize the search for 4-cliques a bit.
  private Set<Set<Integer>> findPureClusters2(
      List<Integer> _variables, Map<Node, Set<Node>> adjacencies) {
    System.out.println("Original variables = " + variables);

    Set<Set<Integer>> clusters = new HashSet<Set<Integer>>();
    List<Integer> allVariables = new ArrayList<Integer>();
    Set<Node> foundVariables = new HashSet<Node>();
    for (int i = 0; i < this.variables.size(); i++) allVariables.add(i);

    for (int x : _variables) {
      Node nodeX = variables.get(x);
      if (foundVariables.contains(nodeX)) continue;

      List<Node> adjX = new ArrayList<Node>(adjacencies.get(nodeX));
      adjX.removeAll(foundVariables);

      if (adjX.size() < 3) continue;

      for (Node nodeY : adjX) {
        if (foundVariables.contains(nodeY)) continue;

        List<Node> commonXY = new ArrayList<Node>(adjacencies.get(nodeY));
        commonXY.retainAll(adjX);
        commonXY.removeAll(foundVariables);

        for (Node nodeZ : commonXY) {
          if (foundVariables.contains(nodeZ)) continue;

          List<Node> commonXZ = new ArrayList<Node>(commonXY);
          commonXZ.retainAll(adjacencies.get(nodeZ));
          commonXZ.removeAll(foundVariables);

          for (Node nodeW : commonXZ) {
            if (foundVariables.contains(nodeW)) continue;

            if (!adjacencies.get(nodeY).contains(nodeW)) {
              continue;
            }

            int y = variables.indexOf(nodeY);
            int w = variables.indexOf(nodeW);
            int z = variables.indexOf(nodeZ);

            Set<Integer> cluster = quartet(x, y, z, w);

            // Note that purity needs to be assessed with respect to all of the variables in order
            // to
            // remove all latent-measure impurities between pairs of latents.
            if (pure(cluster, allVariables)) {

              O:
              for (int o : _variables) {
                if (cluster.contains(o)) continue;
                cluster.add(o);

                if (!clique(cluster, adjacencies)) {
                  cluster.remove(o);
                  continue O;
                }

                //                                if (!allVariablesDependent(cluster)) {
                //                                    cluster.remove(o);
                //                                    continue O;
                //                                }

                List<Integer> _cluster = new ArrayList<Integer>(cluster);

                ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 4);
                int[] choice2;
                int count = 0;

                while ((choice2 = gen2.next()) != null) {
                  int x2 = _cluster.get(choice2[0]);
                  int y2 = _cluster.get(choice2[1]);
                  int z2 = _cluster.get(choice2[2]);
                  int w2 = _cluster.get(choice2[3]);

                  Set<Integer> quartet = quartet(x2, y2, z2, w2);

                  // Optimizes for large clusters.
                  if (quartet.contains(o)) {
                    if (++count > 2) continue O;
                  }

                  if (quartet.contains(o) && !pure(quartet, allVariables)) {
                    cluster.remove(o);
                    continue O;
                  }
                }
              }

              System.out.println(
                  "Cluster found: " + variablesForIndices(new ArrayList<Integer>(cluster)));
              clusters.add(cluster);
              foundVariables.addAll(variablesForIndices(new ArrayList<Integer>(cluster)));
            }
          }
        }
      }
    }

    return clusters;
  }
  // Finds clusters of size 4 or higher.
  private Set<Set<Integer>> findPureClusters(
      List<Integer> _variables, Map<Node, Set<Node>> adjacencies) {
    //        System.out.println("Original variables = " + variables);

    Set<Set<Integer>> clusters = new HashSet<Set<Integer>>();
    List<Integer> allVariables = new ArrayList<Integer>();
    for (int i = 0; i < this.variables.size(); i++) allVariables.add(i);

    VARIABLES:
    while (!_variables.isEmpty()) {
      if (_variables.size() < 4) break;

      for (int x : _variables) {
        Node nodeX = variables.get(x);
        List<Node> adjX = new ArrayList<Node>(adjacencies.get(nodeX));
        adjX.retainAll(variablesForIndices(new ArrayList<Integer>(_variables)));

        for (Node node : new ArrayList<Node>(adjX)) {
          if (adjacencies.get(node).size() < 3) {
            adjX.remove(node);
          }
        }

        if (adjX.size() < 3) {
          continue;
        }

        ChoiceGenerator gen = new ChoiceGenerator(adjX.size(), 3);
        int[] choice;

        while ((choice = gen.next()) != null) {
          Node nodeY = adjX.get(choice[0]);
          Node nodeZ = adjX.get(choice[1]);
          Node nodeW = adjX.get(choice[2]);

          int y = variables.indexOf(nodeY);
          int w = variables.indexOf(nodeW);
          int z = variables.indexOf(nodeZ);

          Set<Integer> cluster = quartet(x, y, z, w);

          if (!clique(cluster, adjacencies)) {
            continue;
          }

          // Note that purity needs to be assessed with respect to all of the variables in order to
          // remove all latent-measure impurities between pairs of latents.
          if (pure(cluster, allVariables)) {

            //                        Collections.shuffle(_variables);

            O:
            for (int o : _variables) {
              if (cluster.contains(o)) continue;
              cluster.add(o);
              List<Integer> _cluster = new ArrayList<Integer>(cluster);

              if (!clique(cluster, adjacencies)) {
                cluster.remove(o);
                continue O;
              }

              //                            if (!allVariablesDependent(cluster)) {
              //                                cluster.remove(o);
              //                                continue O;
              //                            }

              ChoiceGenerator gen2 = new ChoiceGenerator(_cluster.size(), 4);
              int[] choice2;
              int count = 0;

              while ((choice2 = gen2.next()) != null) {
                int x2 = _cluster.get(choice2[0]);
                int y2 = _cluster.get(choice2[1]);
                int z2 = _cluster.get(choice2[2]);
                int w2 = _cluster.get(choice2[3]);

                Set<Integer> quartet = quartet(x2, y2, z2, w2);

                // Optimizes for large clusters.
                if (quartet.contains(o)) {
                  if (++count > 50) continue O;
                }

                if (quartet.contains(o) && !pure(quartet, allVariables)) {
                  cluster.remove(o);
                  continue O;
                }
              }
            }

            System.out.println(
                "Cluster found: " + variablesForIndices(new ArrayList<Integer>(cluster)));
            clusters.add(cluster);
            _variables.removeAll(cluster);

            continue VARIABLES;
          }
        }
      }

      break;
    }

    return clusters;
  }