Ejemplo n.º 1
2
  /**
   * Load the directed graph from the given file
   *
   * @param fileName the file to load the cart from
   * @param featDefinition the feature definition
   * @param dummy unused, just here for compatibility with the FeatureFileIndexer.
   * @throws IOException , {@link MaryConfigurationException} if a problem occurs while loading
   */
  public DirectedGraph load(InputStream inStream) throws IOException, MaryConfigurationException {
    BufferedInputStream buffInStream = new BufferedInputStream(inStream);
    assert buffInStream.markSupported();
    buffInStream.mark(10000);
    // open the CART-File and read the header
    DataInput raf = new DataInputStream(buffInStream);

    MaryHeader maryHeader = new MaryHeader(raf);
    if (!maryHeader.hasCurrentVersion()) {
      throw new IOException("Wrong version of database file");
    }
    if (maryHeader.getType() != MaryHeader.DIRECTED_GRAPH) {
      if (maryHeader.getType() == MaryHeader.CARTS) {
        buffInStream.reset();
        return new MaryCARTReader().loadFromStream(buffInStream);
      } else {
        throw new IOException("Not a directed graph file");
      }
    }

    // Read properties
    short propDataLength = raf.readShort();
    Properties props;
    if (propDataLength == 0) {
      props = null;
    } else {
      byte[] propsData = new byte[propDataLength];
      raf.readFully(propsData);
      ByteArrayInputStream bais = new ByteArrayInputStream(propsData);
      props = new Properties();
      props.load(bais);
      bais.close();
    }

    // Read the feature definition
    FeatureDefinition featureDefinition = new FeatureDefinition(raf);

    // read the decision nodes
    int numDecNodes = raf.readInt(); // number of decision nodes

    // First we need to read all nodes into memory, then we can link them properly
    // in terms of parent/child.
    DecisionNode[] dns = new DecisionNode[numDecNodes];
    int[][] childIndexes = new int[numDecNodes][];
    for (int i = 0; i < numDecNodes; i++) {
      // read one decision node
      int featureNameIndex = raf.readInt();
      int nodeTypeNr = raf.readInt();
      DecisionNode.Type nodeType = DecisionNode.Type.values()[nodeTypeNr];
      int numChildren = 2; // for binary nodes
      switch (nodeType) {
        case BinaryByteDecisionNode:
          int criterion = raf.readInt();
          dns[i] =
              new DecisionNode.BinaryByteDecisionNode(
                  featureNameIndex, (byte) criterion, featureDefinition);
          break;
        case BinaryShortDecisionNode:
          criterion = raf.readInt();
          dns[i] =
              new DecisionNode.BinaryShortDecisionNode(
                  featureNameIndex, (short) criterion, featureDefinition);
          break;
        case BinaryFloatDecisionNode:
          float floatCriterion = raf.readFloat();
          dns[i] =
              new DecisionNode.BinaryFloatDecisionNode(
                  featureNameIndex, floatCriterion, featureDefinition);
          break;
        case ByteDecisionNode:
          numChildren = raf.readInt();
          if (featureDefinition.getNumberOfValues(featureNameIndex) != numChildren) {
            throw new IOException(
                "Inconsistent cart file: feature "
                    + featureDefinition.getFeatureName(featureNameIndex)
                    + " should have "
                    + featureDefinition.getNumberOfValues(featureNameIndex)
                    + " values, but decision node "
                    + i
                    + " has only "
                    + numChildren
                    + " child nodes");
          }
          dns[i] =
              new DecisionNode.ByteDecisionNode(featureNameIndex, numChildren, featureDefinition);
          break;
        case ShortDecisionNode:
          numChildren = raf.readInt();
          if (featureDefinition.getNumberOfValues(featureNameIndex) != numChildren) {
            throw new IOException(
                "Inconsistent cart file: feature "
                    + featureDefinition.getFeatureName(featureNameIndex)
                    + " should have "
                    + featureDefinition.getNumberOfValues(featureNameIndex)
                    + " values, but decision node "
                    + i
                    + " has only "
                    + numChildren
                    + " child nodes");
          }
          dns[i] =
              new DecisionNode.ShortDecisionNode(featureNameIndex, numChildren, featureDefinition);
      }
      dns[i].setUniqueDecisionNodeId(i + 1);
      // now read the children, indexes only:
      childIndexes[i] = new int[numChildren];
      for (int k = 0; k < numChildren; k++) {
        childIndexes[i][k] = raf.readInt();
      }
    }

    // read the leaves
    int numLeafNodes = raf.readInt(); // number of leaves, it does not include empty leaves
    LeafNode[] lns = new LeafNode[numLeafNodes];

    for (int j = 0; j < numLeafNodes; j++) {
      // read one leaf node
      int leafTypeNr = raf.readInt();
      LeafNode.LeafType leafNodeType = LeafNode.LeafType.values()[leafTypeNr];
      switch (leafNodeType) {
        case IntArrayLeafNode:
          int numData = raf.readInt();
          int[] data = new int[numData];
          for (int d = 0; d < numData; d++) {
            data[d] = raf.readInt();
          }
          lns[j] = new LeafNode.IntArrayLeafNode(data);
          break;
        case FloatLeafNode:
          float stddev = raf.readFloat();
          float mean = raf.readFloat();
          lns[j] = new LeafNode.FloatLeafNode(new float[] {stddev, mean});
          break;
        case IntAndFloatArrayLeafNode:
        case StringAndFloatLeafNode:
          int numPairs = raf.readInt();
          int[] ints = new int[numPairs];
          float[] floats = new float[numPairs];
          for (int d = 0; d < numPairs; d++) {
            ints[d] = raf.readInt();
            floats[d] = raf.readFloat();
          }
          if (leafNodeType == LeafNode.LeafType.IntAndFloatArrayLeafNode)
            lns[j] = new LeafNode.IntAndFloatArrayLeafNode(ints, floats);
          else lns[j] = new LeafNode.StringAndFloatLeafNode(ints, floats);
          break;
        case FeatureVectorLeafNode:
          throw new IllegalArgumentException(
              "Reading feature vector leaf nodes is not yet implemented");
        case PdfLeafNode:
          throw new IllegalArgumentException("Reading pdf leaf nodes is not yet implemented");
      }
      lns[j].setUniqueLeafId(j + 1);
    }

    // Graph nodes
    int numDirectedGraphNodes = raf.readInt();
    DirectedGraphNode[] graphNodes = new DirectedGraphNode[numDirectedGraphNodes];
    int[] dgnLeafIndices = new int[numDirectedGraphNodes];
    int[] dgnDecIndices = new int[numDirectedGraphNodes];
    for (int g = 0; g < numDirectedGraphNodes; g++) {
      graphNodes[g] = new DirectedGraphNode(null, null);
      graphNodes[g].setUniqueGraphNodeID(g + 1);
      dgnLeafIndices[g] = raf.readInt();
      dgnDecIndices[g] = raf.readInt();
    }

    // Now, link up the decision nodes with their daughters
    for (int i = 0; i < numDecNodes; i++) {
      // System.out.print(dns[i]+" "+dns[i].getFeatureName()+" ");
      for (int k = 0; k < childIndexes[i].length; k++) {
        Node child = childIndexToNode(childIndexes[i][k], dns, lns, graphNodes);
        dns[i].addDaughter(child);
        // System.out.print(" "+dns[i].getDaughter(k));
      }
      // System.out.println();
    }
    // And link up directed graph nodes
    for (int g = 0; g < numDirectedGraphNodes; g++) {
      Node leaf = childIndexToNode(dgnLeafIndices[g], dns, lns, graphNodes);
      graphNodes[g].setLeafNode(leaf);
      Node dec = childIndexToNode(dgnDecIndices[g], dns, lns, graphNodes);
      if (dec != null && !dec.isDecisionNode())
        throw new IllegalArgumentException("Only decision nodes allowed, read " + dec.getClass());
      graphNodes[g].setDecisionNode((DecisionNode) dec);
      // System.out.println("Graph node "+(g+1)+", leaf: "+Integer.toHexString(dgnLeafIndices[g])+",
      // "+leaf+" -- dec: "+Integer.toHexString(dgnDecIndices[g])+", "+dec);
    }

    Node rootNode;
    if (graphNodes.length > 0) {
      rootNode = graphNodes[0];
    } else if (dns.length > 0) {
      rootNode = dns[0];
      // CART behaviour, not sure if this is needed:
      // Now count all data once, so that getNumberOfData()
      // will return the correct figure.
      ((DecisionNode) rootNode).countData();
    } else if (lns.length > 0) {
      rootNode = lns[0]; // single-leaf tree...
    } else {
      rootNode = null;
    }

    // set the rootNode as the rootNode of cart
    return new DirectedGraph(rootNode, featureDefinition, props);
  }
Ejemplo n.º 2
0
  private void printDecisionNodes(DirectedGraph graph, DataOutput out, PrintWriter pw)
      throws IOException {
    for (DecisionNode decNode : graph.getDecisionNodes()) {
      int id = decNode.getUniqueDecisionNodeId();
      String nodeDefinition = decNode.getNodeDefinition();
      int featureIndex = decNode.getFeatureIndex();
      DecisionNode.Type nodeType = decNode.getDecisionNodeType();

      if (out != null) {
        // dump in binary form to output
        out.writeInt(featureIndex);
        out.writeInt(nodeType.ordinal());
        // Now, questionValue, which depends on nodeType
        switch (nodeType) {
          case BinaryByteDecisionNode:
            out.writeInt(((BinaryByteDecisionNode) decNode).getCriterionValueAsByte());
            assert decNode.getNumberOfDaugthers() == 2;
            break;
          case BinaryShortDecisionNode:
            out.writeInt(((BinaryShortDecisionNode) decNode).getCriterionValueAsShort());
            assert decNode.getNumberOfDaugthers() == 2;
            break;
          case BinaryFloatDecisionNode:
            out.writeFloat(((BinaryFloatDecisionNode) decNode).getCriterionValueAsFloat());
            assert decNode.getNumberOfDaugthers() == 2;
            break;
          case ByteDecisionNode:
          case ShortDecisionNode:
            out.writeInt(decNode.getNumberOfDaugthers());
        }

        // The child nodes
        for (int i = 0, n = decNode.getNumberOfDaugthers(); i < n; i++) {
          Node daughter = decNode.getDaughter(i);
          if (daughter == null) {
            out.writeInt(0);
          } else if (daughter.isDecisionNode()) {
            int daughterID = ((DecisionNode) daughter).getUniqueDecisionNodeId();
            // Mark as decision node:
            daughterID |= DirectedGraphReader.DECISIONNODE << 30;
            out.writeInt(daughterID);
          } else if (daughter.isLeafNode()) {
            int daughterID = ((LeafNode) daughter).getUniqueLeafId();
            // Mark as leaf node:
            if (daughterID != 0) daughterID |= DirectedGraphReader.LEAFNODE << 30;
            out.writeInt(daughterID);
          } else if (daughter.isDirectedGraphNode()) {
            int daughterID = ((DirectedGraphNode) daughter).getUniqueGraphNodeID();
            // Mark as directed graph node:
            if (daughterID != 0) daughterID |= DirectedGraphReader.DIRECTEDGRAPHNODE << 30;
            out.writeInt(daughterID);
          }
        }
      }
      if (pw != null) {
        // dump to print writer
        StringBuilder strNode = new StringBuilder("-" + id + " " + nodeDefinition);
        for (int i = 0, n = decNode.getNumberOfDaugthers(); i < n; i++) {
          strNode.append(" ");
          Node daughter = decNode.getDaughter(i);
          if (daughter == null) {
            strNode.append("0");
          } else if (daughter.isDecisionNode()) {
            int daughterID = ((DecisionNode) daughter).getUniqueDecisionNodeId();
            strNode.append("-").append(daughterID);
            out.writeInt(daughterID);
          } else if (daughter.isLeafNode()) {
            int daughterID = ((LeafNode) daughter).getUniqueLeafId();
            if (daughterID == 0) strNode.append("0");
            else strNode.append("id").append(daughterID);
          } else if (daughter.isDirectedGraphNode()) {
            int daughterID = ((DirectedGraphNode) daughter).getUniqueGraphNodeID();
            if (daughterID == 0) strNode.append("0");
            else strNode.append("DGN").append(daughterID);
          }
        }
        pw.println(strNode.toString());
      }
    }
  }