private void trainModelWithClasses(double param) { int wayListSizeWithoutUnclassified = wayList.size(); int u = 0; System.out.println("trainList size: " + wayListSizeWithoutUnclassified); // set classes for each osm instance int sizeToBeAddedToArray = 0; // this will be used to proper init the features array, adding the multiple vectors size // int lalala = 0; // setProgress(trainProgress-10); for (OSMWay way : wayList) { // setProgress(trainProgress++); OSMClassification classifyInstances = new OSMClassification(); classifyInstances.calculateClasses( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); if (way.getClassIDs().isEmpty()) { wayListSizeWithoutUnclassified -= 1; u++; } else { sizeToBeAddedToArray = sizeToBeAddedToArray + way.getClassIDs().size() - 1; } } double C = param; double eps = 0.001; double[] GROUPS_ARRAY2 = new double[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]; // new double[117558];// FeatureNode[][] trainingSetWithUnknown2 = new FeatureNode[wayListSizeWithoutUnclassified + sizeToBeAddedToArray] [numberOfFeatures + 1422]; // working[3812]; int k = 0; // setProgress(trainProgress+5); for (OSMWay way : wayList) { // adding multiple vectors int id; // if(USE_CLASS_FEATURES){ ClassFeatures class_vector = new ClassFeatures(); class_vector.createClassFeatures( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); id = 1422; // } // else{ // id = 1; // } // pass id also: 1422 if using classes, 1 if not GeometryFeatures geometryFeatures = new GeometryFeatures(id); geometryFeatures.createGeometryFeatures(way); id = geometryFeatures.getLastID(); // id after geometry, cases: all geometry features with mean-variance boolean intervals: // id = 1526 if (USE_RELATION_FEATURES) { RelationFeatures relationFeatures = new RelationFeatures(id); relationFeatures.createRelationFeatures(way, relationList); id = relationFeatures.getLastID(); } else { id = geometryFeatures.getLastID(); } // id 1531 TextualFeatures textualFeatures; if (USE_TEXTUAL_FEATURES) { textualFeatures = new TextualFeatures(id, namesList, languageDetector); textualFeatures.createTextualFeatures(way); } List<FeatureNode> featureNodeList = way.getFeatureNodeList(); FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()]; if (!way.getClassIDs().isEmpty()) { int i = 0; for (FeatureNode featureNode : featureNodeList) { featureNodeArray[i] = featureNode; i++; } for (int classID : way.getClassIDs()) { // lalala++; trainingSetWithUnknown2[k] = featureNodeArray; GROUPS_ARRAY2[k] = classID; k++; } } } // Linear.enableDebugOutput(); Problem problem = new Problem(); problem.l = wayListSizeWithoutUnclassified + sizeToBeAddedToArray; // wayListSizeWithoutUnclassified;//wayList.size(); problem.n = numberOfFeatures + 1422; // 3797; // number of features //the largest index of all features //3811;//3812 // //1812 with classes problem.x = trainingSetWithUnknown2; // feature nodes problem.y = GROUPS_ARRAY2; // target values // SolverType solver = SolverType.MCSVM_CS; //Cramer and Singer for multiclass classification - // equivalent of SVMlight SolverType solver2 = SolverType.getById(2); // 2 -- L2-regularized L2-loss support vector classification (primal) Parameter parameter = new Parameter(solver2, C, eps); // System.out.println("param set ok"); // System.out.println("number of features: " + vc.getNumOfFeatures()); long start = System.nanoTime(); System.out.println("training..."); PrintStream original = System.out; System.setOut( new PrintStream( new OutputStream() { @Override public void write(int arg0) throws IOException {} })); Model model = Linear.train(problem, parameter); long end = System.nanoTime(); Long elapsedTime = end - start; System.setOut(original); System.out.println( "training process completed in: " + NANOSECONDS.toSeconds(elapsedTime) + " seconds."); // System.out.println("trained"); // decide model path and naming and/or way of deleting/creating 1 or more models. // File inFile = new File(inputFilePath).getParentFile(); File modelFile = new File(modelDirectory.getAbsolutePath() + "/model_with_classes"); File customModelFile; if (topKIsSelected) { customModelFile = new File( modelDirectory.getAbsolutePath() + "/" + inputFileName + "_model" + "_c" + param + "_topK" + topK + ".1"); } else { customModelFile = new File( modelDirectory.getAbsolutePath() + "/" + inputFileName + "_model_c" + param + "_maxF" + frequency + ".1"); } if (customModelFile.exists()) { customModelFile.delete(); } if (modelFile.exists()) { modelFile.delete(); } try { // System.out.println("file created"); model.save(modelFile); model.save(customModelFile); System.out.println("model with classes saved at: " + modelFile); System.out.println("custom model with classes saved at: " + modelFile); } catch (IOException ex) { Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex); } }
public void crossValidateFold(int a, int b, int c, int d, boolean skip, double param) { System.out.println("Starting cross validation"); int testSize = wayList.size() / 5; List<OSMWay> trainList = new ArrayList<>(); for (int g = a * testSize; g < b * testSize; g++) { // 0~~1~~2~~3~~4~~5 if (skip) { if (g == (c) * testSize) { g = (c + 1) * testSize; } } trainList.add(wayList.get(g)); } int wayListSizeWithoutUnclassified = trainList.size(); int u = 0; System.out.println("trainList size: " + wayListSizeWithoutUnclassified); // set classes for each osm instance int sizeToBeAddedToArray = 0; // this will be used to proper init the features array, adding the multiple vectors size int lalala = 0; for (OSMWay way : trainList) { OSMClassification classifyInstances = new OSMClassification(); classifyInstances.calculateClasses( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); if (way.getClassIDs().isEmpty()) { wayListSizeWithoutUnclassified -= 1; u++; } else { sizeToBeAddedToArray = sizeToBeAddedToArray + way.getClassIDs().size() - 1; } } double C = param; double eps = 0.001; double[] GROUPS_ARRAY2 = new double[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]; // new double[117558];// FeatureNode[][] trainingSetWithUnknown2 = new FeatureNode[wayListSizeWithoutUnclassified + sizeToBeAddedToArray] [numberOfFeatures]; // working[3812]; int k = 0; for (OSMWay way : trainList) { // adding multiple vectors int id; if (USE_CLASS_FEATURES) { ClassFeatures class_vector = new ClassFeatures(); class_vector.createClassFeatures( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); id = 1422; } else { id = 1; } // pass id also: 1422 if using classes, 1 if not GeometryFeatures geometryFeatures = new GeometryFeatures(id); geometryFeatures.createGeometryFeatures(way); id = geometryFeatures.getLastID(); // id after geometry, cases: all geometry features with mean-variance boolean intervals: // id = 1526 if (USE_RELATION_FEATURES) { RelationFeatures relationFeatures = new RelationFeatures(id); relationFeatures.createRelationFeatures(way, relationList); id = relationFeatures.getLastID(); } else { id = geometryFeatures.getLastID(); } // id 1531 TextualFeatures textualFeatures; if (USE_TEXTUAL_FEATURES) { textualFeatures = new TextualFeatures(id, namesList, languageDetector); textualFeatures.createTextualFeatures(way); } List<FeatureNode> featureNodeList = way.getFeatureNodeList(); FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()]; if (!way.getClassIDs().isEmpty()) { int i = 0; for (FeatureNode featureNode : featureNodeList) { featureNodeArray[i] = featureNode; i++; } for (int classID : way.getClassIDs()) { lalala++; trainingSetWithUnknown2[k] = featureNodeArray; GROUPS_ARRAY2[k] = classID; k++; } } } // Linear.enableDebugOutput(); Problem problem = new Problem(); problem.l = wayListSizeWithoutUnclassified + sizeToBeAddedToArray; // wayListSizeWithoutUnclassified;//wayList.size(); problem.n = numberOfFeatures; // (geometry 105 + textual //3797; // number of features //the // largest index of all features //3811;//3812 //1812 with classes problem.x = trainingSetWithUnknown2; // feature nodes problem.y = GROUPS_ARRAY2; // target values // SolverType solver = SolverType.MCSVM_CS; //Cramer and Singer for multiclass classification - // equivalent of SVMlight SolverType solver2 = SolverType.getById(2); // 2 -- L2-regularized L2-loss support vector classification (primal) Parameter parameter = new Parameter(solver2, C, eps); // System.out.println("param set ok"); // System.out.println("number of features: " + vc.getNumOfFeatures()); long start = System.nanoTime(); System.out.println("training..."); PrintStream original = System.out; System.setOut( new PrintStream( new OutputStream() { @Override public void write(int arg0) throws IOException {} })); Model model = Linear.train(problem, parameter); long end = System.nanoTime(); Long elapsedTime = end - start; System.setOut(original); System.out.println( "training process completed in: " + NANOSECONDS.toSeconds(elapsedTime) + " seconds."); // System.out.println("trained"); // decide model path and naming and/or way of deleting/creating 1 or more models. // File inFile = new File(inputFilePath).getParentFile(); File modelFile; if (USE_CLASS_FEATURES) { modelFile = new File(modelDirectory.getAbsolutePath() + "/model_with_classes_c=" + param); } else { modelFile = new File(modelDirectory.getAbsolutePath() + "/model_geometries_textual_c=" + param); } if (modelFile.exists()) { modelFile.delete(); } try { // System.out.println("file created"); model.save(modelFile); System.out.println("model saved at: " + modelFile); } catch (IOException ex) { Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex); } // end of evaluation training // test set List<OSMWay> testList = new ArrayList<>(); for (int g = c * testSize; g < d * testSize; g++) { testList.add(wayList.get(g)); // liblinear test } System.out.println("testList size: " + testList.size()); int succededInstances = 0; int succededInstances5 = 0; int succededInstances10 = 0; try { model = Model.load(modelFile); } catch (IOException ex) { Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex); } int modelLabelSize = model.getLabels().length; int[] labels = model.getLabels(); Map<Integer, Integer> mapLabelsToIDs = new HashMap<>(); for (int h = 0; h < model.getLabels().length; h++) { mapLabelsToIDs.put(labels[h], h); // System.out.println(h + " <-> " + labels[h]); } int wayListSizeWithoutUnclassified2 = testList.size(); for (OSMWay way : testList) { OSMClassification classifyInstances = new OSMClassification(); classifyInstances.calculateClasses( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); if (way.getClassIDs().isEmpty()) { // System.out.println("found unclassified" + way.getClassIDs() + "class: " // +way.getClassID()); wayListSizeWithoutUnclassified2 -= 1; // u++; } } FeatureNode[] testInstance2; for (OSMWay way : testList) { int id; if (USE_CLASS_FEATURES) { ClassFeatures class_vector = new ClassFeatures(); class_vector.createClassFeatures( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); id = 1422; } else { id = 1; } // pass id also: 1422 if using classes, 1 if not GeometryFeatures geometryFeatures = new GeometryFeatures(id); geometryFeatures.createGeometryFeatures(way); id = geometryFeatures.getLastID(); // id after geometry, cases: all geometry features with mean-variance boolean intervals: // id = 1526 // System.out.println("id 1526 -> " + geometryFeatures.getLastID()); if (USE_RELATION_FEATURES) { RelationFeatures relationFeatures = new RelationFeatures(id); relationFeatures.createRelationFeatures(way, relationList); id = relationFeatures.getLastID(); } else { id = geometryFeatures.getLastID(); // System.out.println("geom feat " + id); } // id 1531 // System.out.println("id 1532 -> " + relationFeatures.getLastID()); if (USE_TEXTUAL_FEATURES) { TextualFeatures textualFeatures = new TextualFeatures(id, namesList, languageDetector); textualFeatures.createTextualFeatures(way); // System.out.println("last textual id: " + textualFeatures.getLastID()); // System.out.println("full: " + way.getFeatureNodeList()); } else { } List<FeatureNode> featureNodeList = way.getFeatureNodeList(); FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()]; int i = 0; for (FeatureNode featureNode : featureNodeList) { featureNodeArray[i] = featureNode; i++; } testInstance2 = featureNodeArray; // double prediction = Linear.predict(model, testInstance2); // System.out.println("test prediction: " + prediction); double[] scores = new double[modelLabelSize]; Linear.predictValues(model, testInstance2, scores); // find index of max values in scores array: predicted classes are the elements of these // indexes from array model.getlabels // iter scores and find 10 max values with their indexes first. then ask those indexes from // model.getlabels Map<Double, Integer> scoresValues = new HashMap<>(); for (int h = 0; h < scores.length; h++) { scoresValues.put(scores[h], h); // System.out.println(h + " <-> " + scores[h]); } Arrays.sort(scores); // System.out.println("max value: " + scores[scores.length-1] + " second max: " + // scores[scores.length-2]); // System.out.println("ask this index from labels: " + // scoresValues.get(scores[scores.length-1])); // System.out.println("got from labels: " + // labels[scoresValues.get(scores[scores.length-1])]); // System.out.println("next prediction: " + // labels[scoresValues.get(scores[scores.length-2])]); // System.out.println("way labels: " + way.getClassIDs()); // System.out.println("test prediction: " + prediction); if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])) { succededInstances++; } if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])])) { succededInstances5++; } if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 6])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 7])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 8])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 9])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 10])])) { succededInstances10++; } // System.out.println("labels: " + Arrays.toString(model.getLabels())); // System.out.println("label[0]: " + model.getLabels()[0]); } System.out.println( "Succeeded " + succededInstances + " of " + testList.size() + " total (1 class prediction)"); double precision1 = succededInstances / (double) wayListSizeWithoutUnclassified2; score1 = precision1; System.out.println(precision1); System.out.println( "Succeeded " + succededInstances5 + " of " + testList.size() + " total (5 class prediction)"); double precision5 = succededInstances5 / (double) wayListSizeWithoutUnclassified2; score5 = precision5; System.out.println(precision5); System.out.println( "Succeeded " + succededInstances10 + " of " + testList.size() + " total (10 class prediction)"); double precision10 = succededInstances10 / (double) wayListSizeWithoutUnclassified2; score10 = precision10; System.out.println(precision10); }