예제 #1
0
 /**
  * Assign unique ids to leaf nodes.
  *
  * @param graph
  * @return the number of different leaf nodes
  */
 private int setUniqueLeafNodeIds(DirectedGraph graph) {
   int i = 0;
   for (LeafNode l : graph.getLeafNodes()) {
     l.setUniqueLeafId(++i);
   }
   return i;
 }
예제 #2
0
  public void run() {
    try {
      long startTime = System.currentTimeMillis();

      logger.debug(id + "> Creating " + descFile.getName());
      createDescFile();

      logger.debug(id + "> Dumping features to " + featFile.getName());
      dumpFeatureVectors();

      logger.debug(id + "> Dumping distance matrix to " + distFile.getName());
      binarySaveDistanceMatrix();

      logger.debug(id + "> Calling wagon as follows:");
      logger.debug(systemCall);
      Process p = Runtime.getRuntime().exec(systemCall);
      // collect the output
      // read from error stream
      StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), id + " err");

      // read from output stream
      StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), id + " out");
      // start reading from the streams
      errorGobbler.start();
      outputGobbler.start();
      p.waitFor();
      if (p.exitValue() != 0) {
        finished = true;
        success = false;
      } else {
        success = true;
        logger.debug(id + "> Wagon call took " + (System.currentTimeMillis() - startTime) + " ms");

        // read in the resulting CART
        logger.debug(id + "> Reading CART");
        BufferedReader buf = new BufferedReader(new FileReader(cartFile));
        WagonCARTReader wagonReader = new WagonCARTReader(LeafType.IntAndFloatArrayLeafNode);
        cart = new CART(wagonReader.load(buf, featureDefinition), featureDefinition);
        buf.close();

        // Fix the new cart's leaves:
        // They are currently the index numbers in featureVectors;
        // but what we need is the unit index numbers!
        for (LeafNode leaf : cart.getLeafNodes()) {
          int[] data = (int[]) leaf.getAllData();
          for (int i = 0; i < data.length; i++) {
            data[i] = fv[data[i]].getUnitIndex();
          }
        }

        logger.debug(id + "> completed in " + (System.currentTimeMillis() - startTime) + " ms");
        finished = true;
      }
      if (!Boolean.getBoolean("wagon.keepfiles")) {
        featFile.delete();
        distFile.delete();
      }

    } catch (Exception e) {
      e.printStackTrace();
      finished = true;
      success = false;
      throw new RuntimeException("Exception running wagon");
    }
  }
예제 #3
0
 private void printLeafNodes(DirectedGraph graph, DataOutput out, PrintWriter pw)
     throws IOException {
   for (LeafNode leaf : graph.getLeafNodes()) {
     if (leaf.getUniqueLeafId() == 0) // empty leaf, do not write
     continue;
     LeafType leafType = leaf.getLeafNodeType();
     if (leafType == LeafType.FeatureVectorLeafNode) {
       leafType = LeafType.IntArrayLeafNode;
       // save feature vector leaf nodes as int array leaf nodes
     }
     if (out != null) {
       // Leaf node type
       out.writeInt(leafType.ordinal());
     }
     if (pw != null) {
       pw.print("id" + leaf.getUniqueLeafId() + " " + leafType);
     }
     switch (leaf.getLeafNodeType()) {
       case IntArrayLeafNode:
         int data[] = ((IntArrayLeafNode) leaf).getIntData();
         // Number of data points following:
         if (out != null) out.writeInt(data.length);
         if (pw != null) pw.print(" " + data.length);
         // for each index, write the index
         for (int i = 0; i < data.length; i++) {
           if (out != null) out.writeInt(data[i]);
           if (pw != null) pw.print(" " + data[i]);
         }
         break;
       case FloatLeafNode:
         float stddev = ((FloatLeafNode) leaf).getStDeviation();
         float mean = ((FloatLeafNode) leaf).getMean();
         if (out != null) {
           out.writeFloat(stddev);
           out.writeFloat(mean);
         }
         if (pw != null) {
           pw.print(" 1 " + stddev + " " + mean);
         }
         break;
       case IntAndFloatArrayLeafNode:
       case StringAndFloatLeafNode:
         int data1[] = ((IntAndFloatArrayLeafNode) leaf).getIntData();
         float floats[] = ((IntAndFloatArrayLeafNode) leaf).getFloatData();
         // Number of data points following:
         if (out != null) out.writeInt(data1.length);
         if (pw != null) pw.print(" " + data1.length);
         // for each index, write the index and then its float
         for (int i = 0; i < data1.length; i++) {
           if (out != null) {
             out.writeInt(data1[i]);
             out.writeFloat(floats[i]);
           }
           if (pw != null) pw.print(" " + data1[i] + " " + floats[i]);
         }
         break;
       case FeatureVectorLeafNode:
         FeatureVector fv[] = ((FeatureVectorLeafNode) leaf).getFeatureVectors();
         // Number of data points following:
         if (out != null) out.writeInt(fv.length);
         if (pw != null) pw.print(" " + fv.length);
         // for each feature vector, write the index
         for (int i = 0; i < fv.length; i++) {
           if (out != null) out.writeInt(fv[i].getUnitIndex());
           if (pw != null) pw.print(" " + fv[i].getUnitIndex());
         }
         break;
       case PdfLeafNode:
         throw new IllegalArgumentException("Writing of pdf leaf nodes not yet implemented");
     }
     if (pw != null) pw.println();
   }
 }