Example #1
0
  /*
   * Function :: startLearning, This function, reads the file values into a
   * MxN Matrix datatype, The data from Matrix is further split across a set
   * of Training Vectors, and a FinalClass Vector. As our ID3 algortihm takes
   * set of Training vectors & final class vector as inputs. This
   * function,internally calls the learnTree Function, which is an
   * implementation of ID3 algorithm
   */
  public TreeNode startLearning() {

    if (FileNameToRead == null) {
      System.out.println("---- Error ------");
      System.out.println("---- Please Specify test data set ------");
    }

    if (PercentageOfDataToLEarnFrom < 0) {
      System.out.println("---- Error ------");
      System.out.println("---- Please Specify %correctly ------");
    }

    MatrixData matrix = new MatrixData(); // Prepares a new matrix datatype
    matrix.prepareMatrix(FileNameToRead, PercentageOfDataToLEarnFrom); // reads
    // values
    // from
    // file
    // into
    // the
    // matrix
    // data
    // type
    // If you want to see the matrix learned, remove this comment
    // matrix.printMatrix();

    // This is a Hashmap, which contains Training Vectors
    HashMap<String, int[]> setTrainingVector = new HashMap<String, int[]>();
    // Now i need a set of R training vectors
    for (int i = 0; i < matrix.coloumns - 1; i++) { // Training Vectors being
      // filled into the map
      // fromthe matrix
      int[] trainingVector = new int[matrix.Numrows];
      matrix.fillArray(trainingVector, i);
      setTrainingVector.put(matrix.Headers.get(i), trainingVector);
    }

    // i need final class vector
    int[] FinalClass = new int[matrix.Numrows];
    matrix.fillArray(FinalClass, matrix.coloumns - 1); // final class vector
    // being filled from
    // the matrix

    // generated a Tree node,
    TreeNode rootNode = new TreeNode();
    rootNode.setAtrvalue(-1); // since its a root node

    // Calling the ID3 implementation algorithm
    learnTree(setTrainingVector, FinalClass, rootNode, matrix);
    return rootNode;
  }
Example #2
0
  /*
   * Function :: startLearning, Recursive Function. AN exact copy of ID3
   * algorithm(http://en.wikipedia.org/wiki/ID3_algorithm) This function
   * generates a decision tree recursively. Parameters: 1.A Hashmap containing
   * Training Vectors :: HashMap<String,int[]> setTrainingVector 2.A VEctor of
   * Final class :: int[] FinalClass 3.THe decision tree NOde::TreeNode node
   * 4.The MAtrix datatype, which is used in constructing vectors of train
   * data::MatrixData matrix
   */
  public void learnTree(
      HashMap<String, int[]> setTrainingVector,
      int[] FinalClass,
      TreeNode node,
      MatrixData matrix) {

    // if all values positive or negative in FinalClass
    // return a leaf node
    if (checkFinalClass(FinalClass, 0)) { // If all examples are 0, Return
      // the single-node tree Root,
      // with label = 0.
      node.fClass = 0;
      return;
    } else if (checkFinalClass(FinalClass, 1)) { // If all examples are 1,
      // Return the single-node
      // tree Root, with label =
      // 1.
      node.fClass = 1;
      return;
    }

    // If there is only one attribute in a training vector, then we select
    // the FinalClass as max occurance
    if (setTrainingVector.entrySet().size() == 1) {
      int cPos = getCountPositives(FinalClass);
      int cNeg = FinalClass.length - cPos;
      if (cPos >= cNeg) {
        node.fClass = 0;
        return;
      } else {
        node.fClass = 1;
        return;
      }
    } else {

      // now if not
      // Use Gain to select the node.
      HashMap<String, Double> attributesGains = new HashMap<String, Double>(); // The
      // map
      // storing
      // the
      // Gains
      // for
      // each
      // attributes
      HashMap<String, ArrayList<Integer>> mapAttributesValuesInListUnique =
          new HashMap<String, ArrayList<Integer>>(); // The
      // map
      // storing
      // the
      // unique
      // attribute
      // values
      // for
      // a
      // attributeName

      double entropyS = getEntropy(FinalClass); // initial entropy
      // System.out.println("Entropy Default \t"+entropyS);

      for (Map.Entry entry : setTrainingVector.entrySet()) {
        // System.out.println();
        // System.out.println("Finding Gain for :: "+entry.getKey());
        HashMap<Integer, Integer> atrPositive = new HashMap<Integer, Integer>();
        HashMap<Integer, Integer> atrNegative = new HashMap<Integer, Integer>();
        ArrayList<Integer> atrUnique = new ArrayList<Integer>();

        int[] trainingClass = (int[]) entry.getValue();
        for (int i = 0; i < trainingClass.length; i++) { // NOw finding
          // individual
          // entropies
          addOnlyUnique(atrUnique, trainingClass[i]);
          if (FinalClass[i] == 0) // its a positive
          {
            if (atrPositive.containsKey(trainingClass[i])) {
              atrPositive.put(trainingClass[i], atrPositive.get(trainingClass[i]) + 1);
            } else {
              atrPositive.put(trainingClass[i], 1);
            }
          } else { // FinalClass is negative
            if (atrNegative.containsKey(trainingClass[i])) {
              atrNegative.put(trainingClass[i], atrNegative.get(trainingClass[i]) + 1);
            } else {
              atrNegative.put(trainingClass[i], 1);
            }
          }
        }

        mapAttributesValuesInListUnique.put((String) entry.getKey(), atrUnique);
        // now calculate gain
        {
          double gain = entropyS;
          for (int tempAttr : atrUnique) {
            double entropyTemp = 0.0;
            int positives = 0;
            int negatives = 0;
            if (atrPositive.get(tempAttr) != null) positives = atrPositive.get(tempAttr);
            if (atrNegative.get(tempAttr) != null) negatives = atrNegative.get(tempAttr);

            // System.out.print("\tFor attribute :"+tempAttr);
            // System.out.print("Positives :"+positives);
            // System.out.print(",Negatives :"+negatives);

            double val1 = (double) (positives) / (positives + negatives);
            double val2 = (double) (negatives) / (positives + negatives);
            entropyTemp = -(val1 * log2(val1)) - (val2 * log2(val2));
            // System.out.print(",entropy temp :"+entropyTemp+"\n");

            gain = gain - ((((double) positives + negatives) / trainingClass.length) * entropyTemp);
          }
          // System.out.println("Gain came out::"+gain);
          attributesGains.put((String) entry.getKey(), gain);
        }
      } // loop ends

      // now select the maximum gain
      String attributeWithMAxGain = "";
      double maxGainValue = 0.0;
      int indexToChoose = 0;
      for (Map.Entry entry : setTrainingVector.entrySet()) {
        double tempGain = attributesGains.get((String) entry.getKey());
        if (indexToChoose == 0) {
          maxGainValue = tempGain;
          attributeWithMAxGain = (String) entry.getKey();
          indexToChoose++;
        }

        if (tempGain > maxGainValue) {
          maxGainValue = tempGain;
          attributeWithMAxGain = (String) entry.getKey();
        }
      } // loop ends
      /*
       * if(attributeWithMAxGain.equals("")) return;
       */
      // System.out.println("Finally attribute :"+attributeWithMAxGain);
      // System.out.println("has the max gain  :"+maxGainValue);

      node.setAttributeName(attributeWithMAxGain);
      node.setfClass(-1);
      node.setGain(maxGainValue);

      // Now we will call this algorithm recursively for how mwany
      // attributes values of max attribute.
      ArrayList<Integer> atrUniqueValuesForAttrMaxGain =
          mapAttributesValuesInListUnique.get(attributeWithMAxGain);

      for (int tempAtrUniqueValue : atrUniqueValuesForAttrMaxGain) {

        TreeNode NodeChild = new TreeNode();
        NodeChild.setAtrvalue(tempAtrUniqueValue); // since its a child
        // node
        node.getBranches().add(NodeChild);
        MatrixData matrixChild = matrix.splitMatrix(attributeWithMAxGain, tempAtrUniqueValue);
        // matrixChild.printMatrix();
        // calling the algorithm
        HashMap<String, int[]> setTrainingVectorChild = new HashMap<String, int[]>();
        // Now i need a set of R training vectors
        for (int i = 0; i < matrixChild.coloumns - 1; i++) {
          int[] trainingVectorChild = new int[matrixChild.Numrows];
          matrixChild.fillArray(trainingVectorChild, i);
          setTrainingVectorChild.put(matrixChild.Headers.get(i), trainingVectorChild);
        }

        // i need final class vector
        int[] FinalClassChild = new int[matrixChild.Numrows];
        matrixChild.fillArray(FinalClassChild, matrixChild.coloumns - 1);

        learnTree(setTrainingVectorChild, FinalClassChild, NodeChild, matrixChild);
      }

      return;
    }
  }