Пример #1
0
  private String consecuente(
      myDataset dataset, int data[][], int classData[], int infoAttr[], int nClases, Rule rule) {

    int k, l;
    double tmp1, tmp2;
    int pos = 0, classPredicted;
    double Waip;

    classPredicted = -1;
    Waip = 0;

    tmp1 = Double.NEGATIVE_INFINITY;
    for (l = 0; l < nClases; l++) {
      tmp2 = 0;
      for (k = 0; k < rule.getRule().length; k++) {
        tmp2 += RuleSet.computeWeightEvidence(data, classData, rule.getiCondition(k), l, infoAttr);
      }
      if (tmp2 > tmp1) {
        tmp1 = tmp2;
        pos = l;
      }
    }
    classPredicted = pos;
    Waip = tmp1;

    return dataset.getOutputValue(classPredicted) + " [" + Double.toString(Waip) + "]";
  }
Пример #2
0
  private String classificationOutput(
      myDataset dataset,
      int ex,
      int data[][],
      int classData[],
      int infoAttr[],
      Vector<Rule> contenedor,
      int nClases) {

    int j, k, l;
    boolean match;
    double tmp1, tmp2;
    int pos = 0, classPredicted;
    double Waip;
    int ejemplo[] = new int[data[0].length];

    for (j = 0; j < ejemplo.length; j++) {
      if (dataset.isMissing(ex, j)) ejemplo[j] = -1;
      else ejemplo[j] = dataset.valueExample(ex, j);
    }

    classPredicted = -1;
    Waip = 0;

    /*Search a match of the example (following by the container)*/
    for (j = contenedor.size() - 1; j >= 0; j--) {
      match = true;
      for (k = 0; k < contenedor.elementAt(j).getRule().length && match; k++) {
        if (ejemplo[contenedor.elementAt(j).getiCondition(k).getAttribute()]
            != contenedor.elementAt(j).getiCondition(k).getValue()) {
          match = false;
        }
      }
      if (match) {
        tmp1 = Double.NEGATIVE_INFINITY;
        for (l = 0; l < nClases; l++) {
          tmp2 = 0;
          for (k = 0; k < contenedor.elementAt(j).getRule().length; k++) {
            tmp2 +=
                RuleSet.computeWeightEvidence(
                    data, classData, contenedor.elementAt(j).getiCondition(k), l, infoAttr);
          }
          if (tmp2 > tmp1) {
            tmp1 = tmp2;
            pos = l;
          }
        }
        if (tmp1 > Waip) {
          classPredicted = pos;
          Waip = tmp1;
        }
      }
    }
    if (classPredicted == -1) return "Unclassified";

    return dataset.getOutputValue(classPredicted);
  }
Пример #3
0
  /**
   * One-point crossover
   *
   * @param cr1 index of parent 1 in poblation
   * @param cr2 index of parent 2 in poblation
   */
  public void onePointCrossover(int cr1, int cr2) {
    RuleSet rule1 = poblacion[cr1];
    RuleSet rule2 = poblacion[cr2];

    // there are 3*number of attribute elements, plus class value in each cromosome
    int cutpoint = Randomize.Randint(0, n_genes);
    int cutpoint_rule = cutpoint / (3 * nAtt + 1);
    int cutpoint_variable = cutpoint % (3 * nAtt + 1);

    // rule1 is replaced from cutpoint (inclusive) to the end of his rule set
    rule1.copyFromPointtoEnd(rule2, cutpoint_rule, cutpoint_variable);
    // rule2 is replaced from the begining of his rule set to cutpoint (not inclusive)
    rule2.copyFromBegintoPoint(rule1, cutpoint_rule, cutpoint_variable);
    // childs must be evaluated
    rule1.setEvaluated(false);
    rule2.setEvaluated(false);
  }
Пример #4
0
  private boolean reglaPositiva(
      myDataset dataset, int data[][], int classData[], int infoAttr[], int nClases, Rule rule) {

    int k, l;
    double tmp1, tmp2;
    double Waip;

    Waip = 0;

    tmp1 = Double.NEGATIVE_INFINITY;
    for (l = 0; l < nClases; l++) {
      tmp2 = 0;
      for (k = 0; k < rule.getRule().length; k++) {
        tmp2 += RuleSet.computeWeightEvidence(data, classData, rule.getiCondition(k), l, infoAttr);
      }
      if (tmp2 > tmp1) {
        tmp1 = tmp2;
      }
    }
    Waip = tmp1;

    return Waip > 0 ? true : false;
  }
Пример #5
0
 private void combiningNP(Map<String, Rules> rl, Map<String, RuleSet> ruleset, int i) {
   RuleSet rs;
   String rkey;
   int order1, order2;
   Rules rule;
   List<RuleData> ls;
   String keys[] = {
     "NNNP", "VBNP", "JJNP", "NNCP", "VBCP", "JJCP",
   }; // noun, adj, vb in noun phrase
   for (String s : rl.keySet()) {
     rule = rl.get(s);
     rule.initMap();
     // process NP chunk
     // cases to considered: PRO-TG (PRO)*
     // TG prep1 PRO1 (prep2 PRO2)*
     for (String subkey : keys) {
       ls = rule.getEvalRules(subkey); // all NP pattern of current trg
       if (ls == null) {
         continue;
       }
       rkey = s + i + subkey; // trg + type + pos(first two letters) + chunk type
       // has NP patterns
       rs = new RuleSet();
       order1 = 0;
       order2 = 0;
       for (RuleData dt : ls) {
         if (dt.in_chunk) { // in chunk case
           rs.in_chunk = true;
           rs.inchunk_count += dt.count; // count number of inchunk event
           if (!dt.has_cause) { // all event type without theme2/cause
             if (dt.event1) {
               rs.ecount += dt.count;
             } else {
               rs.pcount += dt.count;
             }
           } else if (i == 5 && dt.has_cause) { // theme2 PRO-TG PRO
             rs.t2count += dt.count;
             rs.pcount += dt.count;
             rs.dist2 = Math.max(rs.dist2, dt.dist2);
           } else if (i > 5 && dt.theme_pos && dt.has_cause) { // has cause
             rs.pcause += dt.count; // assume only pro is cause : PRO - TG Theme (Pro/Evt)
             if (dt.event1) {
               rs.ecount += dt.count;
             } else {
               rs.pcount += dt.count;
             }
             rs.dist1 = Math.max(rs.dist1, dt.dist1);
           }
         } else if (dt.theme_pos) { // for all POS : TG - prep - PRO
           if ((dt.POS.startsWith("NN") && !dt.prep1.isEmpty())
               || !dt.POS.startsWith("NN")) { // (NN && prep) or (VB/JJ)
             if (!dt.has_cause) { // all event type, no theme2 / cause
               if (i <= 5) {
                 rs.pcount += dt.count;
               } else {
                 if (dt.event1) {
                   rs.ecount += dt.count;
                 } else {
                   rs.pcount += dt.count;
                 }
               }
               rs.dist1 = Math.max(rs.dist1, dt.dist1);
             } else if (dt.cause_pos
                 && !dt.prep2.isEmpty()
                 && dt.POS.startsWith("NN")) { // TG-prep1-PRO1-prep2-PRO2 ; only NNx
               if (i == 5) {
                 rs.t2count += dt.count;
                 rs.pcount += dt.count;
               } else {
                 if (dt.event1) {
                   rs.ecount += dt.count;
                 } else {
                   rs.pcount += dt.count;
                 }
                 if (dt.event2) {
                   rs.ecause += dt.count;
                 } else {
                   rs.pcause += dt.count;
                 }
               }
               rs.dist1 = Math.max(rs.dist1, dt.dist1);
               rs.dist2 = Math.max(rs.dist2, dt.dist2);
             }
           }
         } else if (i == 5
             && !dt.theme_pos
             && ((dt.has_cause && dt.cause_pos)
                 || !dt.POS.startsWith("NN"))) { // Binding: PRO1 - TG - PRO2
           rs.in_front += dt.count;
           if (!dt.prep2.isEmpty()) {
             rs.prep_2.add(dt.prep2);
           }
           if (!dt.prep1.isEmpty()) {
             rs.prep_1.add(dt.prep1);
           }
           rs.dist1 = Math.max(rs.dist1, dt.dist1);
           rs.dist2 = Math.max(rs.dist2, dt.dist2);
           rs.pcount += dt.count;
           rs.dist2 = Math.max(rs.dist2, dt.dist2);
         }
       }
       rs.detected = getCountDetectedTG(s, subkey);
       if (order2 > order1) {
         rs.order = false;
       }
       if (rs.getFreq() >= 2) {
         ruleset.put(rkey, rs);
       }
     }
   }
 }
Пример #6
0
 private void combiningVP(Map<String, Rules> rl, Map<String, RuleSet> ruleset, int i) {
   RuleSet rs;
   String rkey;
   int order1;
   Rules rule;
   List<RuleData> ls;
   String keys[] = {"VBVP", "JJVP"}; // noun, adj, vb in noun phrase
   for (String s : rl.keySet()) {
     rule = rl.get(s);
     rule.initMap();
     // ** VP JJ
     for (String subkey : keys) {
       ls = rule.getEvalRules(subkey); // all NP pattern of current trg
       if (ls == null) {
         continue;
       }
       rkey = s + i + subkey;
       rs = new RuleSet();
       order1 = 0;
       for (RuleData dt : ls) {
         if (dt.count < 2 && i < 5) {
           // continue;
         }
         if (!dt.has_cause) {
           if (i <= 5) {
             rs.pcount += dt.count;
           } else {
             if (dt.event1) {
               rs.ecount += dt.count;
             } else {
               rs.pcount += dt.count;
             }
           }
         } else { // has theme2/cause
           if (i == 5) {
             rs.t2count += dt.count;
             rs.pcount += dt.count;
           } else {
             if (dt.event1) {
               rs.ecount += dt.count;
             } else {
               rs.pcount += dt.count;
             }
             if (dt.event2) {
               rs.ecause += dt.count;
             } else {
               rs.pcause += dt.count;
             }
           }
         }
         if (dt.verb_type == 1 && dt.POS.equals("VBN") && dt.theme_pos) { //
           order1 += dt.count;
         } else if (dt.verb_type == 1 && dt.POS.equals("VBN") && !dt.theme_pos) {
           order1 -= dt.count;
         }
         rs.dist1 = Math.max(rs.dist1, dt.dist1);
         rs.dist2 = Math.max(rs.dist2, dt.dist2);
       }
       rs.detected = getCountDetectedTG(s, subkey);
       if (order1 > 0) {
         rs.order = false;
       }
       if (rs.getFreq() >= 2) {
         ruleset.put(rkey, rs);
       }
     }
   }
 }
Пример #7
0
  /** It launches the algorithm */
  public void execute() {

    int i, j, k, l;
    int t;
    int ele;
    double prob[];
    double aux;
    double NUmax = 1.5; // used for lineal ranking
    double NUmin = 0.5; // used for lineal ranking
    double pos1, pos2;
    int sel1, sel2;
    int data[][];
    int infoAttr[];
    int classData[];
    Vector<Rule> contenedor = new Vector<Rule>();
    Vector<Rule> conjR = new Vector<Rule>();
    Rule tmpRule;
    Condition tmpCondition[] = new Condition[1];
    RuleSet population[];
    RuleSet hijo1, hijo2;

    if (somethingWrong) { // We do not execute the program
      System.err.println("An error was found, the data-set has numerical values.");
      System.err.println("Aborting the program");
      // We should not use the statement: System.exit(-1);
    } else {
      Randomize.setSeed(seed);

      nClasses = train.getnClasses();

      /*Build the nominal data information*/
      infoAttr = new int[train.getnInputs()];
      for (i = 0; i < infoAttr.length; i++) {
        infoAttr[i] = train.numberValues(i);
      }

      data = new int[train.getnData()][train.getnInputs()];
      for (i = 0; i < data.length; i++) {
        for (j = 0; j < data[i].length; j++) {
          if (train.isMissing(i, j)) data[i][j] = -1;
          else data[i][j] = train.valueExample(i, j);
        }
      }

      classData = new int[train.getnData()];
      for (i = 0; i < classData.length; i++) {
        classData[i] = train.getOutputAsInteger(i);
      }

      /*Find first-order rules which result interesting*/

      for (i = 0; i < nClasses; i++) {
        for (j = 0; j < infoAttr.length; j++) {
          for (k = 0; k < infoAttr[j]; k++) {
            tmpCondition[0] = new Condition(j, k);
            tmpRule = new Rule(tmpCondition);
            if (Math.abs(computeAdjustedResidual(data, classData, tmpRule, i)) > 1.96) {
              if (!contenedor.contains(tmpRule)) {
                contenedor.add(tmpRule);
                conjR.add(tmpRule);
              }
            }
          }
        }
      }

      // Construct the Baker selection roulette
      prob = new double[popSize];
      for (j = 0; j < popSize; j++) {
        aux = (double) (NUmax - NUmin) * ((double) j / (popSize - 1));
        prob[j] = (double) (1.0 / (popSize)) * (NUmax - aux);
      }
      for (j = 1; j < popSize; j++) prob[j] = prob[j] + prob[j - 1];

      /*Steady-State Genetic Algorithm*/
      ele = 2;
      population = new RuleSet[popSize];
      while (conjR.size() >= 2) {
        t = 0;

        System.out.println("Producing rules of level " + ele);

        for (i = 0; i < population.length; i++) {
          population[i] = new RuleSet(conjR);
          population[i].computeFitness(data, classData, infoAttr, contenedor, nClasses);
        }

        Arrays.sort(population);

        while (t < numGenerations && !population[0].equals(population[popSize - 1])) {
          System.out.println("Generation " + t);
          t++;

          /*Baker's selection*/
          pos1 = Randomize.Rand();
          pos2 = Randomize.Rand();
          for (l = 0; l < popSize && prob[l] < pos1; l++) ;
          sel1 = l;
          for (l = 0; l < popSize && prob[l] < pos2; l++) ;
          sel2 = l;

          hijo1 = new RuleSet(population[sel1]);
          hijo2 = new RuleSet(population[sel2]);

          if (Randomize.Rand() < pCross) {
            RuleSet.crossover1(hijo1, hijo2);
          } else {
            RuleSet.crossover2(hijo1, hijo2);
          }

          RuleSet.mutation(hijo1, conjR, pMut, data, classData, infoAttr, contenedor, nClasses);
          RuleSet.mutation(hijo2, conjR, pMut, data, classData, infoAttr, contenedor, nClasses);

          hijo1.computeFitness(data, classData, infoAttr, contenedor, nClasses);
          hijo2.computeFitness(data, classData, infoAttr, contenedor, nClasses);

          population[popSize - 2] = new RuleSet(hijo1);
          population[popSize - 1] = new RuleSet(hijo2);

          Arrays.sort(population);
        }

        /*Decode function*/
        ele++;
        conjR.removeAllElements();
        System.out.println(
            "Fitness of the best chromosome in rule level " + ele + ": " + population[0].fitness);
        for (i = 0; i < population[0].getRuleSet().length; i++) {
          if (Math.abs(computeAdjustedResidual(data, classData, population[0].getRule(i), i))
              > 1.96) {
            if (validarRegla(population[0].getRule(i))
                && !contenedor.contains(population[0].getRule(i))) {
              contenedor.add(population[0].getRule(i));
              conjR.add(population[0].getRule(i));
            }
          }
        }
      }

      // Finally we should fill the training and test output files
      doOutput(this.val, this.outputTr, data, classData, infoAttr, contenedor, nClasses);
      doOutput(this.test, this.outputTst, data, classData, infoAttr, contenedor, nClasses);

      /*Print the rule obtained*/
      for (i = contenedor.size() - 1; i >= 0; i--) {
        if (reglaPositiva(
            this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i))) {
          Fichero.AnadirtoFichero(outputRule, contenedor.elementAt(i).toString(train));
          Fichero.AnadirtoFichero(
              outputRule,
              " -> "
                  + consecuente(
                      this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i))
                  + "\n");
        }
      }
      System.out.println("Algorithm Finished");
    }
  }
Пример #8
0
  private Map<FieldName, ? extends ClassificationMap<?>> evaluateRuleSet(
      ModelManagerEvaluationContext context) {
    RuleSetModel ruleSetModel = getModel();

    RuleSet ruleSet = ruleSetModel.getRuleSet();

    List<RuleSelectionMethod> ruleSelectionMethods = ruleSet.getRuleSelectionMethods();

    RuleSelectionMethod ruleSelectionMethod;

    // "If more than one method is included, the first method is used as the default method for
    // scoring"
    if (ruleSelectionMethods.size() > 0) {
      ruleSelectionMethod = ruleSelectionMethods.get(0);
    } else {
      throw new InvalidFeatureException(ruleSet);
    }

    // Both the ordering of keys and values is significant
    ListMultimap<String, SimpleRule> firedRules = LinkedListMultimap.create();

    List<Rule> rules = ruleSet.getRules();
    for (Rule rule : rules) {
      collectFiredRules(firedRules, rule, context);
    }

    RuleClassificationMap result = new RuleClassificationMap();

    RuleSelectionMethod.Criterion criterion = ruleSelectionMethod.getCriterion();

    Set<String> keys = firedRules.keySet();
    for (String key : keys) {
      List<SimpleRule> keyRules = firedRules.get(key);

      switch (criterion) {
        case FIRST_HIT:
          {
            SimpleRule winner = keyRules.get(0);

            // The first value of the first key
            if (result.getEntity() == null) {
              result.setEntity(winner);
            }

            result.put(key, winner.getConfidence());
          }
          break;
        case WEIGHTED_SUM:
          {
            SimpleRule winner = null;

            double totalWeight = 0;

            for (SimpleRule keyRule : keyRules) {

              if (winner == null || (winner.getWeight() < keyRule.getWeight())) {
                winner = keyRule;
              }

              totalWeight += keyRule.getWeight();
            }

            result.put(winner, key, totalWeight / firedRules.size());
          }
          break;
        case WEIGHTED_MAX:
          {
            SimpleRule winner = null;

            for (SimpleRule keyRule : keyRules) {

              if (winner == null || (winner.getWeight() < keyRule.getWeight())) {
                winner = keyRule;
              }
            }

            result.put(winner, key, winner.getConfidence());
          }
          break;
        default:
          throw new UnsupportedFeatureException(ruleSelectionMethod, criterion);
      }
    }

    return TargetUtil.evaluateClassification(result, context);
  }