/** * Build Decorate classifier * * @param data the training data to be used for generating the classifier * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (m_Classifier == null) { throw new Exception("A base classifier has not been specified!"); } if (data.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if (data.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!"); } if (m_NumIterations < m_DesiredSize) throw new Exception("Max number of iterations must be >= desired ensemble size!"); // initialize random number generator if (m_Seed == -1) m_Random = new Random(); else m_Random = new Random(m_Seed); int i = 1; // current committee size int numTrials = 1; // number of Decorate iterations Instances divData = new Instances(data); // local copy of data - diversity data divData.deleteWithMissingClass(); Instances artData = null; // artificial data // compute number of artficial instances to add at each iteration int artSize = (int) (Math.abs(m_ArtSize) * divData.numInstances()); if (artSize == 0) artSize = 1; // atleast add one random example computeStats(data); // Compute training data stats for creating artificial examples // initialize new committee m_Committee = new Vector(); Classifier newClassifier = m_Classifier; newClassifier.buildClassifier(divData); m_Committee.add(newClassifier); double eComm = computeError(divData); // compute ensemble error if (m_Debug) System.out.println( "Initialize:\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm); // repeat till desired committee size is reached OR the max number of iterations is exceeded while (i < m_DesiredSize && numTrials < m_NumIterations) { // Generate artificial training examples artData = generateArtificialData(artSize, data); // Label artificial examples labelData(artData); addInstances(divData, artData); // Add new artificial data // Build new classifier Classifier tmp[] = Classifier.makeCopies(m_Classifier, 1); newClassifier = tmp[0]; newClassifier.buildClassifier(divData); // Remove all the artificial data removeInstances(divData, artSize); // Test if the new classifier should be added to the ensemble m_Committee.add(newClassifier); // add new classifier to current committee double currError = computeError(divData); if (currError <= eComm) { // adding the new member did not increase the error i++; eComm = currError; if (m_Debug) System.out.println( "Iteration: " + (1 + numTrials) + "\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm); } else { // reject the current classifier because it increased the ensemble error m_Committee.removeElementAt(m_Committee.size() - 1); // pop the last member } numTrials++; } }
private double[] classify(String test) { String[] lab = { "I.2", "I.3", "I.5", "I.6", "I.2.1", "I.2.6", "I.2.8", "I.3.5", "I.3.6", "I.3.7", "I.5.1", "I.5.2", "I.5.4", "I.6.3", "I.6.5", "I.6.8", }; int NSel = 1000; // Number of selection Filter[] filters = new Filter[2]; double[] x = new double[16]; double[] prd = new double[16]; double clsLabel; Ranker rank = new Ranker(); Evaluation eval = null; StringToWordVector stwv = new StringToWordVector(); weka.filters.supervised.attribute.AttributeSelection featSel = new weka.filters.supervised.attribute.AttributeSelection(); WordTokenizer wtok = new WordTokenizer(); String delim = " \r\n\t.,;:'\"()?!$*-&[]+/|\\"; InfoGainAttributeEval ig = new InfoGainAttributeEval(); String[] stwvOpts; wtok.setDelimiters(delim); Instances[] dataRaw = new Instances[10000]; DataSource[] source = new DataSource[16]; String str; Instances testset = null; DataSource testsrc = null; try { testsrc = new DataSource(test); testset = testsrc.getDataSet(); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } for (int j = 0; j < 16; j++) // 16 element 0-15 { try { str = lab[j]; source[j] = new DataSource( "D:/Users/nma1g11/workspace2/WebScraperFlatNew/dataPernode/new/" + str + ".arff"); dataRaw[j] = source[j].getDataSet(); } catch (Exception e) { e.printStackTrace(); } System.out.println(lab[j]); if (dataRaw[j].classIndex() == -1) dataRaw[j].setClassIndex(dataRaw[j].numAttributes() - 1); } if (testset.classIndex() == -1) testset.setClassIndex(testset.numAttributes() - 1); try { stwvOpts = weka.core.Utils.splitOptions( "-R first-last -W 1000000 -prune-rate -1.0 -C -T -I -N 1 -L -S -stemmer weka.core.stemmers.LovinsStemmer -M 2 "); stwv.setOptions(stwvOpts); stwv.setTokenizer(wtok); rank.setOptions(weka.core.Utils.splitOptions("-T -1.7976931348623157E308 -N 100")); rank.setNumToSelect(NSel); featSel.setEvaluator(ig); featSel.setSearch(rank); } catch (Exception e) { e.printStackTrace(); } filters[0] = stwv; filters[1] = featSel; System.out.println("Loading is Done!"); MultiFilter mfilter = new MultiFilter(); mfilter.setFilters(filters); FilteredClassifier classify = new FilteredClassifier(); classify.setClassifier( new NaiveBayesMultinomial()); ///////// Algorithm of The Classification ///////// classify.setFilter(mfilter); String ss2 = ""; try { Classifier[] clsArr = new Classifier[16]; clsArr = Classifier.makeCopies(classify, 16); String strcls = ""; List<String> clsList = new ArrayList<String>(); String s = null; String newcls = null; String lb = ""; String prev = ""; boolean flag = false; String Ocls = null; int q = 0; for (int i = 0; i < 16; i++) { for (int k = 0; k < testset.numInstances(); k++) { flag = false; s = testset.instance(k).stringValue(1); clsList.add(s); if (lab[i].equals(s)) { flag = true; newcls = s; } } clsArr[i].buildClassifier(dataRaw[i]); eval = new Evaluation(dataRaw[i]); for (int j = 0; j < testset.numInstances(); j++) { Ocls = testset.instance(j).stringValue(1); if (flag && !s.equals(null)) testset.instance(j).setClassValue(lab[i]); // ----------------------------------------- strcls = testset.instance(j).stringValue(1); if (i < 4) { if (strcls.substring(0, 3).equals(lab[i])) testset.instance(j).setClassValue(lab[i]); } else if (lab[i].substring(0, 3).equals(strcls)) testset.instance(j).setClassValue(lab[i]); // ------------------------------------------------ System.out.println( dataRaw[i].classAttribute().value(i) + " --- > Correct%:" + eval.pctCorrect() + " F-measure:" + eval.fMeasure(i)); if (!prev.equals(testset.instance(j).stringValue(0)) || !lab[i].equals(lb)) { clsLabel = clsArr[i].classifyInstance(testset.instance(j)); x = clsArr[i].distributionForInstance(testset.instance(j)); prd[i] = x[i]; System.out.println(" --- > prob: " + clsLabel); System.out.println(" --- > x :" + x[i]); System.out.println(clsLabel + " --> " + testset.classAttribute().value((int) clsLabel)); } testset.instance(j).setClassValue(Ocls); prev = testset.instance(j).stringValue(0); lb = lab[i]; } System.out.println("Done with " + lab[i].replace("99", "") + " !!!!!!!!!!!"); } System.out.println(eval.correct()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return prd; }