/** * selects a given number of features by info gain eval * * @param number the number of features to select * @throws Exception */ public void selectFeatures(int number) throws Exception { weka.filters.supervised.attribute.AttributeSelection filter = new weka.filters.supervised.attribute.AttributeSelection(); InfoGainAttributeEval eval = new InfoGainAttributeEval(); Ranker search = new Ranker(); search.setNumToSelect(number); filter.setEvaluator(eval); filter.setSearch(search); filter.setInputFormat(_train); Instances newData = Filter.useFilter(_train, filter); Instances newData_test = Filter.useFilter(_test, filter); _train = newData; _test = newData_test; }
private double[] classify(String test) { String[] lab = { "I.2", "I.3", "I.5", "I.6", "I.2.1", "I.2.6", "I.2.8", "I.3.5", "I.3.6", "I.3.7", "I.5.1", "I.5.2", "I.5.4", "I.6.3", "I.6.5", "I.6.8", }; int NSel = 1000; // Number of selection Filter[] filters = new Filter[2]; double[] x = new double[16]; double[] prd = new double[16]; double clsLabel; Ranker rank = new Ranker(); Evaluation eval = null; StringToWordVector stwv = new StringToWordVector(); weka.filters.supervised.attribute.AttributeSelection featSel = new weka.filters.supervised.attribute.AttributeSelection(); WordTokenizer wtok = new WordTokenizer(); String delim = " \r\n\t.,;:'\"()?!$*-&[]+/|\\"; InfoGainAttributeEval ig = new InfoGainAttributeEval(); String[] stwvOpts; wtok.setDelimiters(delim); Instances[] dataRaw = new Instances[10000]; DataSource[] source = new DataSource[16]; String str; Instances testset = null; DataSource testsrc = null; try { testsrc = new DataSource(test); testset = testsrc.getDataSet(); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } for (int j = 0; j < 16; j++) // 16 element 0-15 { try { str = lab[j]; source[j] = new DataSource( "D:/Users/nma1g11/workspace2/WebScraperFlatNew/dataPernode/new/" + str + ".arff"); dataRaw[j] = source[j].getDataSet(); } catch (Exception e) { e.printStackTrace(); } System.out.println(lab[j]); if (dataRaw[j].classIndex() == -1) dataRaw[j].setClassIndex(dataRaw[j].numAttributes() - 1); } if (testset.classIndex() == -1) testset.setClassIndex(testset.numAttributes() - 1); try { stwvOpts = weka.core.Utils.splitOptions( "-R first-last -W 1000000 -prune-rate -1.0 -C -T -I -N 1 -L -S -stemmer weka.core.stemmers.LovinsStemmer -M 2 "); stwv.setOptions(stwvOpts); stwv.setTokenizer(wtok); rank.setOptions(weka.core.Utils.splitOptions("-T -1.7976931348623157E308 -N 100")); rank.setNumToSelect(NSel); featSel.setEvaluator(ig); featSel.setSearch(rank); } catch (Exception e) { e.printStackTrace(); } filters[0] = stwv; filters[1] = featSel; System.out.println("Loading is Done!"); MultiFilter mfilter = new MultiFilter(); mfilter.setFilters(filters); FilteredClassifier classify = new FilteredClassifier(); classify.setClassifier( new NaiveBayesMultinomial()); ///////// Algorithm of The Classification ///////// classify.setFilter(mfilter); String ss2 = ""; try { Classifier[] clsArr = new Classifier[16]; clsArr = Classifier.makeCopies(classify, 16); String strcls = ""; List<String> clsList = new ArrayList<String>(); String s = null; String newcls = null; String lb = ""; String prev = ""; boolean flag = false; String Ocls = null; int q = 0; for (int i = 0; i < 16; i++) { for (int k = 0; k < testset.numInstances(); k++) { flag = false; s = testset.instance(k).stringValue(1); clsList.add(s); if (lab[i].equals(s)) { flag = true; newcls = s; } } clsArr[i].buildClassifier(dataRaw[i]); eval = new Evaluation(dataRaw[i]); for (int j = 0; j < testset.numInstances(); j++) { Ocls = testset.instance(j).stringValue(1); if (flag && !s.equals(null)) testset.instance(j).setClassValue(lab[i]); // ----------------------------------------- strcls = testset.instance(j).stringValue(1); if (i < 4) { if (strcls.substring(0, 3).equals(lab[i])) testset.instance(j).setClassValue(lab[i]); } else if (lab[i].substring(0, 3).equals(strcls)) testset.instance(j).setClassValue(lab[i]); // ------------------------------------------------ System.out.println( dataRaw[i].classAttribute().value(i) + " --- > Correct%:" + eval.pctCorrect() + " F-measure:" + eval.fMeasure(i)); if (!prev.equals(testset.instance(j).stringValue(0)) || !lab[i].equals(lb)) { clsLabel = clsArr[i].classifyInstance(testset.instance(j)); x = clsArr[i].distributionForInstance(testset.instance(j)); prd[i] = x[i]; System.out.println(" --- > prob: " + clsLabel); System.out.println(" --- > x :" + x[i]); System.out.println(clsLabel + " --> " + testset.classAttribute().value((int) clsLabel)); } testset.instance(j).setClassValue(Ocls); prev = testset.instance(j).stringValue(0); lb = lab[i]; } System.out.println("Done with " + lab[i].replace("99", "") + " !!!!!!!!!!!"); } System.out.println(eval.correct()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return prd; }