private String consecuente( myDataset dataset, int data[][], int classData[], int infoAttr[], int nClases, Rule rule) { int k, l; double tmp1, tmp2; int pos = 0, classPredicted; double Waip; classPredicted = -1; Waip = 0; tmp1 = Double.NEGATIVE_INFINITY; for (l = 0; l < nClases; l++) { tmp2 = 0; for (k = 0; k < rule.getRule().length; k++) { tmp2 += RuleSet.computeWeightEvidence(data, classData, rule.getiCondition(k), l, infoAttr); } if (tmp2 > tmp1) { tmp1 = tmp2; pos = l; } } classPredicted = pos; Waip = tmp1; return dataset.getOutputValue(classPredicted) + " [" + Double.toString(Waip) + "]"; }
private String classificationOutput( myDataset dataset, int ex, int data[][], int classData[], int infoAttr[], Vector<Rule> contenedor, int nClases) { int j, k, l; boolean match; double tmp1, tmp2; int pos = 0, classPredicted; double Waip; int ejemplo[] = new int[data[0].length]; for (j = 0; j < ejemplo.length; j++) { if (dataset.isMissing(ex, j)) ejemplo[j] = -1; else ejemplo[j] = dataset.valueExample(ex, j); } classPredicted = -1; Waip = 0; /*Search a match of the example (following by the container)*/ for (j = contenedor.size() - 1; j >= 0; j--) { match = true; for (k = 0; k < contenedor.elementAt(j).getRule().length && match; k++) { if (ejemplo[contenedor.elementAt(j).getiCondition(k).getAttribute()] != contenedor.elementAt(j).getiCondition(k).getValue()) { match = false; } } if (match) { tmp1 = Double.NEGATIVE_INFINITY; for (l = 0; l < nClases; l++) { tmp2 = 0; for (k = 0; k < contenedor.elementAt(j).getRule().length; k++) { tmp2 += RuleSet.computeWeightEvidence( data, classData, contenedor.elementAt(j).getiCondition(k), l, infoAttr); } if (tmp2 > tmp1) { tmp1 = tmp2; pos = l; } } if (tmp1 > Waip) { classPredicted = pos; Waip = tmp1; } } } if (classPredicted == -1) return "Unclassified"; return dataset.getOutputValue(classPredicted); }
/** * One-point crossover * * @param cr1 index of parent 1 in poblation * @param cr2 index of parent 2 in poblation */ public void onePointCrossover(int cr1, int cr2) { RuleSet rule1 = poblacion[cr1]; RuleSet rule2 = poblacion[cr2]; // there are 3*number of attribute elements, plus class value in each cromosome int cutpoint = Randomize.Randint(0, n_genes); int cutpoint_rule = cutpoint / (3 * nAtt + 1); int cutpoint_variable = cutpoint % (3 * nAtt + 1); // rule1 is replaced from cutpoint (inclusive) to the end of his rule set rule1.copyFromPointtoEnd(rule2, cutpoint_rule, cutpoint_variable); // rule2 is replaced from the begining of his rule set to cutpoint (not inclusive) rule2.copyFromBegintoPoint(rule1, cutpoint_rule, cutpoint_variable); // childs must be evaluated rule1.setEvaluated(false); rule2.setEvaluated(false); }
private boolean reglaPositiva( myDataset dataset, int data[][], int classData[], int infoAttr[], int nClases, Rule rule) { int k, l; double tmp1, tmp2; double Waip; Waip = 0; tmp1 = Double.NEGATIVE_INFINITY; for (l = 0; l < nClases; l++) { tmp2 = 0; for (k = 0; k < rule.getRule().length; k++) { tmp2 += RuleSet.computeWeightEvidence(data, classData, rule.getiCondition(k), l, infoAttr); } if (tmp2 > tmp1) { tmp1 = tmp2; } } Waip = tmp1; return Waip > 0 ? true : false; }
private void combiningNP(Map<String, Rules> rl, Map<String, RuleSet> ruleset, int i) { RuleSet rs; String rkey; int order1, order2; Rules rule; List<RuleData> ls; String keys[] = { "NNNP", "VBNP", "JJNP", "NNCP", "VBCP", "JJCP", }; // noun, adj, vb in noun phrase for (String s : rl.keySet()) { rule = rl.get(s); rule.initMap(); // process NP chunk // cases to considered: PRO-TG (PRO)* // TG prep1 PRO1 (prep2 PRO2)* for (String subkey : keys) { ls = rule.getEvalRules(subkey); // all NP pattern of current trg if (ls == null) { continue; } rkey = s + i + subkey; // trg + type + pos(first two letters) + chunk type // has NP patterns rs = new RuleSet(); order1 = 0; order2 = 0; for (RuleData dt : ls) { if (dt.in_chunk) { // in chunk case rs.in_chunk = true; rs.inchunk_count += dt.count; // count number of inchunk event if (!dt.has_cause) { // all event type without theme2/cause if (dt.event1) { rs.ecount += dt.count; } else { rs.pcount += dt.count; } } else if (i == 5 && dt.has_cause) { // theme2 PRO-TG PRO rs.t2count += dt.count; rs.pcount += dt.count; rs.dist2 = Math.max(rs.dist2, dt.dist2); } else if (i > 5 && dt.theme_pos && dt.has_cause) { // has cause rs.pcause += dt.count; // assume only pro is cause : PRO - TG Theme (Pro/Evt) if (dt.event1) { rs.ecount += dt.count; } else { rs.pcount += dt.count; } rs.dist1 = Math.max(rs.dist1, dt.dist1); } } else if (dt.theme_pos) { // for all POS : TG - prep - PRO if ((dt.POS.startsWith("NN") && !dt.prep1.isEmpty()) || !dt.POS.startsWith("NN")) { // (NN && prep) or (VB/JJ) if (!dt.has_cause) { // all event type, no theme2 / cause if (i <= 5) { rs.pcount += dt.count; } else { if (dt.event1) { rs.ecount += dt.count; } else { rs.pcount += dt.count; } } rs.dist1 = Math.max(rs.dist1, dt.dist1); } else if (dt.cause_pos && !dt.prep2.isEmpty() && dt.POS.startsWith("NN")) { // TG-prep1-PRO1-prep2-PRO2 ; only NNx if (i == 5) { rs.t2count += dt.count; rs.pcount += dt.count; } else { if (dt.event1) { rs.ecount += dt.count; } else { rs.pcount += dt.count; } if (dt.event2) { rs.ecause += dt.count; } else { rs.pcause += dt.count; } } rs.dist1 = Math.max(rs.dist1, dt.dist1); rs.dist2 = Math.max(rs.dist2, dt.dist2); } } } else if (i == 5 && !dt.theme_pos && ((dt.has_cause && dt.cause_pos) || !dt.POS.startsWith("NN"))) { // Binding: PRO1 - TG - PRO2 rs.in_front += dt.count; if (!dt.prep2.isEmpty()) { rs.prep_2.add(dt.prep2); } if (!dt.prep1.isEmpty()) { rs.prep_1.add(dt.prep1); } rs.dist1 = Math.max(rs.dist1, dt.dist1); rs.dist2 = Math.max(rs.dist2, dt.dist2); rs.pcount += dt.count; rs.dist2 = Math.max(rs.dist2, dt.dist2); } } rs.detected = getCountDetectedTG(s, subkey); if (order2 > order1) { rs.order = false; } if (rs.getFreq() >= 2) { ruleset.put(rkey, rs); } } } }
private void combiningVP(Map<String, Rules> rl, Map<String, RuleSet> ruleset, int i) { RuleSet rs; String rkey; int order1; Rules rule; List<RuleData> ls; String keys[] = {"VBVP", "JJVP"}; // noun, adj, vb in noun phrase for (String s : rl.keySet()) { rule = rl.get(s); rule.initMap(); // ** VP JJ for (String subkey : keys) { ls = rule.getEvalRules(subkey); // all NP pattern of current trg if (ls == null) { continue; } rkey = s + i + subkey; rs = new RuleSet(); order1 = 0; for (RuleData dt : ls) { if (dt.count < 2 && i < 5) { // continue; } if (!dt.has_cause) { if (i <= 5) { rs.pcount += dt.count; } else { if (dt.event1) { rs.ecount += dt.count; } else { rs.pcount += dt.count; } } } else { // has theme2/cause if (i == 5) { rs.t2count += dt.count; rs.pcount += dt.count; } else { if (dt.event1) { rs.ecount += dt.count; } else { rs.pcount += dt.count; } if (dt.event2) { rs.ecause += dt.count; } else { rs.pcause += dt.count; } } } if (dt.verb_type == 1 && dt.POS.equals("VBN") && dt.theme_pos) { // order1 += dt.count; } else if (dt.verb_type == 1 && dt.POS.equals("VBN") && !dt.theme_pos) { order1 -= dt.count; } rs.dist1 = Math.max(rs.dist1, dt.dist1); rs.dist2 = Math.max(rs.dist2, dt.dist2); } rs.detected = getCountDetectedTG(s, subkey); if (order1 > 0) { rs.order = false; } if (rs.getFreq() >= 2) { ruleset.put(rkey, rs); } } } }
/** It launches the algorithm */ public void execute() { int i, j, k, l; int t; int ele; double prob[]; double aux; double NUmax = 1.5; // used for lineal ranking double NUmin = 0.5; // used for lineal ranking double pos1, pos2; int sel1, sel2; int data[][]; int infoAttr[]; int classData[]; Vector<Rule> contenedor = new Vector<Rule>(); Vector<Rule> conjR = new Vector<Rule>(); Rule tmpRule; Condition tmpCondition[] = new Condition[1]; RuleSet population[]; RuleSet hijo1, hijo2; if (somethingWrong) { // We do not execute the program System.err.println("An error was found, the data-set has numerical values."); System.err.println("Aborting the program"); // We should not use the statement: System.exit(-1); } else { Randomize.setSeed(seed); nClasses = train.getnClasses(); /*Build the nominal data information*/ infoAttr = new int[train.getnInputs()]; for (i = 0; i < infoAttr.length; i++) { infoAttr[i] = train.numberValues(i); } data = new int[train.getnData()][train.getnInputs()]; for (i = 0; i < data.length; i++) { for (j = 0; j < data[i].length; j++) { if (train.isMissing(i, j)) data[i][j] = -1; else data[i][j] = train.valueExample(i, j); } } classData = new int[train.getnData()]; for (i = 0; i < classData.length; i++) { classData[i] = train.getOutputAsInteger(i); } /*Find first-order rules which result interesting*/ for (i = 0; i < nClasses; i++) { for (j = 0; j < infoAttr.length; j++) { for (k = 0; k < infoAttr[j]; k++) { tmpCondition[0] = new Condition(j, k); tmpRule = new Rule(tmpCondition); if (Math.abs(computeAdjustedResidual(data, classData, tmpRule, i)) > 1.96) { if (!contenedor.contains(tmpRule)) { contenedor.add(tmpRule); conjR.add(tmpRule); } } } } } // Construct the Baker selection roulette prob = new double[popSize]; for (j = 0; j < popSize; j++) { aux = (double) (NUmax - NUmin) * ((double) j / (popSize - 1)); prob[j] = (double) (1.0 / (popSize)) * (NUmax - aux); } for (j = 1; j < popSize; j++) prob[j] = prob[j] + prob[j - 1]; /*Steady-State Genetic Algorithm*/ ele = 2; population = new RuleSet[popSize]; while (conjR.size() >= 2) { t = 0; System.out.println("Producing rules of level " + ele); for (i = 0; i < population.length; i++) { population[i] = new RuleSet(conjR); population[i].computeFitness(data, classData, infoAttr, contenedor, nClasses); } Arrays.sort(population); while (t < numGenerations && !population[0].equals(population[popSize - 1])) { System.out.println("Generation " + t); t++; /*Baker's selection*/ pos1 = Randomize.Rand(); pos2 = Randomize.Rand(); for (l = 0; l < popSize && prob[l] < pos1; l++) ; sel1 = l; for (l = 0; l < popSize && prob[l] < pos2; l++) ; sel2 = l; hijo1 = new RuleSet(population[sel1]); hijo2 = new RuleSet(population[sel2]); if (Randomize.Rand() < pCross) { RuleSet.crossover1(hijo1, hijo2); } else { RuleSet.crossover2(hijo1, hijo2); } RuleSet.mutation(hijo1, conjR, pMut, data, classData, infoAttr, contenedor, nClasses); RuleSet.mutation(hijo2, conjR, pMut, data, classData, infoAttr, contenedor, nClasses); hijo1.computeFitness(data, classData, infoAttr, contenedor, nClasses); hijo2.computeFitness(data, classData, infoAttr, contenedor, nClasses); population[popSize - 2] = new RuleSet(hijo1); population[popSize - 1] = new RuleSet(hijo2); Arrays.sort(population); } /*Decode function*/ ele++; conjR.removeAllElements(); System.out.println( "Fitness of the best chromosome in rule level " + ele + ": " + population[0].fitness); for (i = 0; i < population[0].getRuleSet().length; i++) { if (Math.abs(computeAdjustedResidual(data, classData, population[0].getRule(i), i)) > 1.96) { if (validarRegla(population[0].getRule(i)) && !contenedor.contains(population[0].getRule(i))) { contenedor.add(population[0].getRule(i)); conjR.add(population[0].getRule(i)); } } } } // Finally we should fill the training and test output files doOutput(this.val, this.outputTr, data, classData, infoAttr, contenedor, nClasses); doOutput(this.test, this.outputTst, data, classData, infoAttr, contenedor, nClasses); /*Print the rule obtained*/ for (i = contenedor.size() - 1; i >= 0; i--) { if (reglaPositiva( this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i))) { Fichero.AnadirtoFichero(outputRule, contenedor.elementAt(i).toString(train)); Fichero.AnadirtoFichero( outputRule, " -> " + consecuente( this.train, data, classData, infoAttr, nClasses, contenedor.elementAt(i)) + "\n"); } } System.out.println("Algorithm Finished"); } }
private Map<FieldName, ? extends ClassificationMap<?>> evaluateRuleSet( ModelManagerEvaluationContext context) { RuleSetModel ruleSetModel = getModel(); RuleSet ruleSet = ruleSetModel.getRuleSet(); List<RuleSelectionMethod> ruleSelectionMethods = ruleSet.getRuleSelectionMethods(); RuleSelectionMethod ruleSelectionMethod; // "If more than one method is included, the first method is used as the default method for // scoring" if (ruleSelectionMethods.size() > 0) { ruleSelectionMethod = ruleSelectionMethods.get(0); } else { throw new InvalidFeatureException(ruleSet); } // Both the ordering of keys and values is significant ListMultimap<String, SimpleRule> firedRules = LinkedListMultimap.create(); List<Rule> rules = ruleSet.getRules(); for (Rule rule : rules) { collectFiredRules(firedRules, rule, context); } RuleClassificationMap result = new RuleClassificationMap(); RuleSelectionMethod.Criterion criterion = ruleSelectionMethod.getCriterion(); Set<String> keys = firedRules.keySet(); for (String key : keys) { List<SimpleRule> keyRules = firedRules.get(key); switch (criterion) { case FIRST_HIT: { SimpleRule winner = keyRules.get(0); // The first value of the first key if (result.getEntity() == null) { result.setEntity(winner); } result.put(key, winner.getConfidence()); } break; case WEIGHTED_SUM: { SimpleRule winner = null; double totalWeight = 0; for (SimpleRule keyRule : keyRules) { if (winner == null || (winner.getWeight() < keyRule.getWeight())) { winner = keyRule; } totalWeight += keyRule.getWeight(); } result.put(winner, key, totalWeight / firedRules.size()); } break; case WEIGHTED_MAX: { SimpleRule winner = null; for (SimpleRule keyRule : keyRules) { if (winner == null || (winner.getWeight() < keyRule.getWeight())) { winner = keyRule; } } result.put(winner, key, winner.getConfidence()); } break; default: throw new UnsupportedFeatureException(ruleSelectionMethod, criterion); } } return TargetUtil.evaluateClassification(result, context); }