Example #1
0
  /**
   * Returns a time series (a vector) sampled from the DBN (passed as a DTOM) Based on
   * BNetStochastic.generate(...). Used (perhaps amongst other things) during the calculation of
   * (approximate) KL divergence between models
   *
   * @param rand RNG to use
   * @param dtom DTOM, parameters of which to generate data from (must have caseInfo set in DTOM)
   * @param n length of time series to generate from parameters
   * @return Time series sampled/generated from the DTOM
   */
  public static Value.Vector generateTimeSeriesDTOM(Random rand, DTOM dtom, int n) {
    // Get the variable names and number of nodes; also get the ordering of the nodes (used for
    // forward sampling)
    int numNodes = dtom.getNumNodes();
    String[] name =
        ((cdms.core.Type.Structured) ((cdms.core.Type.Vector) (dtom.caseInfo.data).t).elt).labels;
    int[] order = dtom.getTotalOrderCopy();

    // Initialize the vectors and structures etc for storing the time series data: (Value.Vector)
    Value.Vector origData = dtom.caseInfo.data;
    Type.Structured inputTypes =
        (Type.Structured)
            ((Type.Vector) origData.t).elt; // Type info for generated data is same as original data

    // Store the assignments in an integer array:
    int[][] newData = new int[numNodes][n];

    // Get the parameters for each node (parameters for first time slice, and second time slice)
    Value.Vector[] paramsT0 = new Value.Vector[numNodes];
    Value.Vector[] paramsT1 = new Value.Vector[numNodes];
    for (int i = 0; i < numNodes; i++) {
      DNode node = (DNode) dtom.getNode(i);
      try {
        // Learn parameters - T1 (inc. temporal arcs)
        Value.Structured model = node.learnModel(dtom.caseInfo.mmlModelLearner, dtom.caseInfo.data);
        paramsT1[i] = (Value.Vector) model.cmpnt(2);

        // Learn parameters - T0 (no temporal arcs)
        Value.Structured modelT0 =
            node.learnModelT0(dtom.caseInfo.mmlModelLearner, dtom.caseInfo.data);
        paramsT0[i] = (Value.Vector) modelT0.cmpnt(2);
      } catch (LearnerException e) {
        throw new RuntimeException("Error learning models. " + e);
      }
    }

    // Determine the arity of each node: (Using a very inelegant method...)
    final Type.Structured datatype = (Type.Structured) ((Type.Vector) (dtom.caseInfo.data).t).elt;
    int[] arity = new int[numNodes];
    for (int i = 0; i < numNodes; i++) {
      Type.Symbolic sType = (Type.Symbolic) datatype.cmpnts[i];
      arity[i] = NeticaFn.makeValidNeticaNames(sType.ids, true).length;
    }

    // Generate a set of assignments for the FIRST time slice
    // (This needs to be done in order, to avoid sampling children before parents...)
    int[] assignmentT0 = new int[numNodes];
    for (int i = 0; i < numNodes; i++) {
      DNode currNode = (DNode) dtom.getNode(order[i]); // ith node in total order
      Value.Vector currParams =
          paramsT0[order[i]].cmpnt(1); // parameters for ith node in total order

      // Get the appropriate distribution to sample from (given values of parents)
      Structured vals;
      if (currNode.getNumParents() == 0) { // No parents
        vals =
            (Value.Structured)
                currParams.elt(
                    0); // Contains the actual probability values; only one element in structure if
                        // no parents...
      } else { // This node has parents (which already have assigned values)
        // Need to work out the index of the relevent parameters given the assignments of parents
        // Parameters are in order of [0,0,0], [0,0,1], [0,0,2], ..., [A,B,C]
        // Index given by: sum_x( val[pa[x]]*prod( arity[pa[x+1...end]] )

        int[] currParents = currNode.getParentCopy(); // Intraslice parents

        // Collect assignments and arity for the current parents
        int[] assignment = new int[currParents.length];
        int[] ar = new int[currParents.length];
        for (int z = 0; z < currParents.length; z++) {
          assignment[z] = assignmentT0[currParents[z]];
          ar[z] = arity[currParents[z]];
        }
        int index = assignmentToIndexReverse(assignment, ar);

        // Find the set of parameters for the current parent assignment:
        vals =
            (Value.Structured)
                currParams.elt(
                    index); // Contains the actual probability values for the current assignment of
                            // parents
      }

      // Now, sample a value according to the probability distribution:
      double rv = rand.nextDouble(); // Random value b/w 0 and 1
      double cumProb = 0.0;
      for (int idx = 0; idx < arity[order[i]]; idx++) { // i.e. loop through each value
        cumProb += vals.doubleCmpnt(idx);
        if (rv < cumProb) { // Assignment to node[ order[i] ] is idx
          assignmentT0[order[i]] = idx;
          break;
        }
      }
    }

    // Generate data from SECOND time slice CPDs - repeatedly...
    int[] assignmentT1 = new int[numNodes];
    for (int lineNum = 0; lineNum < n; lineNum++) {
      // First: record the first time slice assignemnts.
      // Then: copy the second time slice assignments to the first time slice assignments
      if (lineNum > 0) {
        // System.out.println("Assignments line " + (lineNum-1) + " - " +
        // Arrays.toString(assignmentT0) );
        for (int j = 0; j < numNodes; j++) { // j is variable number
          newData[j][lineNum - 1] = assignmentT0[j];
        }

        assignmentT0 = assignmentT1;
        assignmentT1 = new int[numNodes];
      }

      // Now, generate data for second time slice given values of first time slice:
      for (int i = 0; i < numNodes; i++) {
        DNode currNode = (DNode) dtom.getNode(order[i]); // ith node in total order
        Value.Vector currParams =
            paramsT1[order[i]].cmpnt(1); // parameters for ith node in total order

        // Get the appropriate distribution to sample from (given values of parents)
        Structured vals;
        if (currNode.getNumParents() == 0) { // No parents
          vals =
              (Value.Structured)
                  currParams.elt(
                      0); // Contains the actual probability values; only one element in structure
                          // if no parents...
        } else { // This node has parents (which already have assigned values)
          // Need to work out the index of the relevent parameters given the assignments of parents
          // Parameters are in order of [0,0,0], [0,0,1], [0,0,2], ..., [A,B,C]
          // Index given by: sum_x( val[pa[x]]*prod( arity[pa[x+1...end]] )
          // Complicated by the fact that we have temporal parents and intraslice parents...
          // [intraslice_parents, temporal_parents]

          int[] currParents = currNode.getParentCopy(); // Intraslice parents
          int[] currParentsTemporal =
              currNode.getTemporalParentCopy(); // Temporal (interslice) parents

          // Collect the parent assignments and arities
          int numParents = currParents.length + currParentsTemporal.length;
          int[] assignment = new int[numParents];
          int[] ar = new int[numParents];
          for (int z = 0; z < numParents; z++) {
            if (z < currParents.length) { // Dealing with intraslice parents
              assignment[z] = assignmentT1[currParents[z]];
              ar[z] = arity[currParents[z]];
            } else { // Dealing with interslice (t0) parents
              assignment[z] = assignmentT0[currParentsTemporal[z - currParents.length]];
              ar[z] = arity[currParentsTemporal[z - currParents.length]];
            }
          }
          int index = assignmentToIndexReverse(assignment, ar);

          // Find the set of parameters for the current parent assignment:
          vals =
              (Value.Structured)
                  currParams.elt(
                      index); // Contains the actual probability values for the current assignment
                              // of parents
        }

        // Now, sample a value according to the probability distribution:
        double rv = rand.nextDouble(); // Random value b/w 0 and 1
        double cumProb = 0.0;
        for (int idx = 0; idx < arity[order[i]]; idx++) { // i.e. loop through each value
          cumProb += vals.doubleCmpnt(idx);
          if (rv < cumProb) { // Assignment to node[ order[i] ] is idx
            assignmentT1[order[i]] = idx;
            break;
          }
        }
      }
    }

    // Copy the very last line of data:
    for (int j = 0; j < numNodes; j++) {
      newData[j][n - 1] = assignmentT1[j];
    }

    // Now, combine type and value (i.e. assignments) together for each variable:
    Value.Vector[] vecArray = new Value.Vector[numNodes];
    for (int i = 0; i < numNodes; i++) {
      vecArray[i] =
          new VectorFN.FastDiscreteVector(newData[i], (Type.Symbolic) inputTypes.cmpnts[i]);
    }
    // And create the overall data structure:
    Value.Structured vecStruct = new Value.DefStructured(vecArray, name);
    Value.Vector newDataVector = new VectorFN.MultiCol(vecStruct);

    // Return the new time series vector...
    return newDataVector;
  }