예제 #1
0
  private void trainModelWithClasses(double param) {

    int wayListSizeWithoutUnclassified = wayList.size();
    int u = 0;
    System.out.println("trainList size: " + wayListSizeWithoutUnclassified);

    // set classes for each osm instance
    int sizeToBeAddedToArray =
        0; // this will be used to proper init the features array, adding the multiple vectors size
    // int lalala = 0;
    // setProgress(trainProgress-10);
    for (OSMWay way : wayList) {
      // setProgress(trainProgress++);
      OSMClassification classifyInstances = new OSMClassification();
      classifyInstances.calculateClasses(
          way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);

      if (way.getClassIDs().isEmpty()) {
        wayListSizeWithoutUnclassified -= 1;
        u++;
      } else {
        sizeToBeAddedToArray = sizeToBeAddedToArray + way.getClassIDs().size() - 1;
      }
    }
    double C = param;
    double eps = 0.001;
    double[] GROUPS_ARRAY2 =
        new double[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]; // new double[117558];//
    FeatureNode[][] trainingSetWithUnknown2 =
        new FeatureNode[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]
            [numberOfFeatures + 1422]; // working[3812];
    int k = 0;

    // setProgress(trainProgress+5);
    for (OSMWay way : wayList) {
      // adding multiple vectors
      int id;
      // if(USE_CLASS_FEATURES){
      ClassFeatures class_vector = new ClassFeatures();
      class_vector.createClassFeatures(
          way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);
      id = 1422;
      //            }
      //            else{
      //                id = 1;
      //            }
      // pass id also: 1422 if using classes, 1 if not
      GeometryFeatures geometryFeatures = new GeometryFeatures(id);
      geometryFeatures.createGeometryFeatures(way);
      id = geometryFeatures.getLastID();
      // id after geometry, cases: all geometry features with mean-variance boolean intervals:
      // id = 1526
      if (USE_RELATION_FEATURES) {
        RelationFeatures relationFeatures = new RelationFeatures(id);
        relationFeatures.createRelationFeatures(way, relationList);
        id = relationFeatures.getLastID();
      } else {
        id = geometryFeatures.getLastID();
      }
      // id 1531

      TextualFeatures textualFeatures;
      if (USE_TEXTUAL_FEATURES) {
        textualFeatures = new TextualFeatures(id, namesList, languageDetector);
        textualFeatures.createTextualFeatures(way);
      }

      List<FeatureNode> featureNodeList = way.getFeatureNodeList();
      FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()];

      if (!way.getClassIDs().isEmpty()) {
        int i = 0;
        for (FeatureNode featureNode : featureNodeList) {
          featureNodeArray[i] = featureNode;
          i++;
        }
        for (int classID : way.getClassIDs()) {
          // lalala++;
          trainingSetWithUnknown2[k] = featureNodeArray;
          GROUPS_ARRAY2[k] = classID;
          k++;
        }
      }
    }

    // Linear.enableDebugOutput();
    Problem problem = new Problem();
    problem.l =
        wayListSizeWithoutUnclassified
            + sizeToBeAddedToArray; // wayListSizeWithoutUnclassified;//wayList.size();
    problem.n =
        numberOfFeatures
            + 1422; // 3797; // number of features //the largest index of all features //3811;//3812
    // //1812 with classes
    problem.x = trainingSetWithUnknown2; // feature nodes
    problem.y = GROUPS_ARRAY2; // target values
    // SolverType solver = SolverType.MCSVM_CS; //Cramer and Singer for multiclass classification -
    // equivalent of SVMlight
    SolverType solver2 =
        SolverType.getById(2); // 2 -- L2-regularized L2-loss support vector classification (primal)

    Parameter parameter = new Parameter(solver2, C, eps);
    // System.out.println("param set ok");
    // System.out.println("number of features: " + vc.getNumOfFeatures());

    long start = System.nanoTime();
    System.out.println("training...");
    PrintStream original = System.out;
    System.setOut(
        new PrintStream(
            new OutputStream() {

              @Override
              public void write(int arg0) throws IOException {}
            }));

    Model model = Linear.train(problem, parameter);
    long end = System.nanoTime();
    Long elapsedTime = end - start;
    System.setOut(original);
    System.out.println(
        "training process completed in: " + NANOSECONDS.toSeconds(elapsedTime) + " seconds.");
    // System.out.println("trained");

    // decide model path and naming and/or way of deleting/creating 1 or more models.
    // File inFile = new File(inputFilePath).getParentFile();

    File modelFile = new File(modelDirectory.getAbsolutePath() + "/model_with_classes");

    File customModelFile;
    if (topKIsSelected) {
      customModelFile =
          new File(
              modelDirectory.getAbsolutePath()
                  + "/"
                  + inputFileName
                  + "_model"
                  + "_c"
                  + param
                  + "_topK"
                  + topK
                  + ".1");
    } else {
      customModelFile =
          new File(
              modelDirectory.getAbsolutePath()
                  + "/"
                  + inputFileName
                  + "_model_c"
                  + param
                  + "_maxF"
                  + frequency
                  + ".1");
    }

    if (customModelFile.exists()) {
      customModelFile.delete();
    }

    if (modelFile.exists()) {
      modelFile.delete();
    }

    try {
      // System.out.println("file created");
      model.save(modelFile);
      model.save(customModelFile);
      System.out.println("model with classes saved at: " + modelFile);
      System.out.println("custom model with classes saved at: " + modelFile);
    } catch (IOException ex) {
      Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex);
    }
  }
예제 #2
0
  public void crossValidateFold(int a, int b, int c, int d, boolean skip, double param) {
    System.out.println("Starting cross validation");
    int testSize = wayList.size() / 5;

    List<OSMWay> trainList = new ArrayList<>();
    for (int g = a * testSize; g < b * testSize; g++) { // 0~~1~~2~~3~~4~~5
      if (skip) {
        if (g == (c) * testSize) {
          g = (c + 1) * testSize;
        }
      }
      trainList.add(wayList.get(g));
    }

    int wayListSizeWithoutUnclassified = trainList.size();
    int u = 0;
    System.out.println("trainList size: " + wayListSizeWithoutUnclassified);

    // set classes for each osm instance
    int sizeToBeAddedToArray =
        0; // this will be used to proper init the features array, adding the multiple vectors size
    int lalala = 0;
    for (OSMWay way : trainList) {

      OSMClassification classifyInstances = new OSMClassification();
      classifyInstances.calculateClasses(
          way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);

      if (way.getClassIDs().isEmpty()) {
        wayListSizeWithoutUnclassified -= 1;
        u++;
      } else {
        sizeToBeAddedToArray = sizeToBeAddedToArray + way.getClassIDs().size() - 1;
      }
    }
    double C = param;
    double eps = 0.001;
    double[] GROUPS_ARRAY2 =
        new double[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]; // new double[117558];//
    FeatureNode[][] trainingSetWithUnknown2 =
        new FeatureNode[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]
            [numberOfFeatures]; // working[3812];
    int k = 0;

    for (OSMWay way : trainList) {
      // adding multiple vectors
      int id;
      if (USE_CLASS_FEATURES) {
        ClassFeatures class_vector = new ClassFeatures();
        class_vector.createClassFeatures(
            way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);
        id = 1422;
      } else {
        id = 1;
      }
      // pass id also: 1422 if using classes, 1 if not
      GeometryFeatures geometryFeatures = new GeometryFeatures(id);
      geometryFeatures.createGeometryFeatures(way);
      id = geometryFeatures.getLastID();
      // id after geometry, cases: all geometry features with mean-variance boolean intervals:
      // id = 1526
      if (USE_RELATION_FEATURES) {
        RelationFeatures relationFeatures = new RelationFeatures(id);
        relationFeatures.createRelationFeatures(way, relationList);
        id = relationFeatures.getLastID();
      } else {
        id = geometryFeatures.getLastID();
      }
      // id 1531

      TextualFeatures textualFeatures;
      if (USE_TEXTUAL_FEATURES) {
        textualFeatures = new TextualFeatures(id, namesList, languageDetector);
        textualFeatures.createTextualFeatures(way);
      }

      List<FeatureNode> featureNodeList = way.getFeatureNodeList();
      FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()];

      if (!way.getClassIDs().isEmpty()) {
        int i = 0;
        for (FeatureNode featureNode : featureNodeList) {
          featureNodeArray[i] = featureNode;
          i++;
        }
        for (int classID : way.getClassIDs()) {
          lalala++;
          trainingSetWithUnknown2[k] = featureNodeArray;
          GROUPS_ARRAY2[k] = classID;
          k++;
        }
      }
    }

    // Linear.enableDebugOutput();
    Problem problem = new Problem();
    problem.l =
        wayListSizeWithoutUnclassified
            + sizeToBeAddedToArray; // wayListSizeWithoutUnclassified;//wayList.size();
    problem.n =
        numberOfFeatures; // (geometry 105 + textual        //3797; // number of features //the
    // largest index of all features //3811;//3812 //1812 with classes
    problem.x = trainingSetWithUnknown2; // feature nodes
    problem.y = GROUPS_ARRAY2; // target values
    // SolverType solver = SolverType.MCSVM_CS; //Cramer and Singer for multiclass classification -
    // equivalent of SVMlight
    SolverType solver2 =
        SolverType.getById(2); // 2 -- L2-regularized L2-loss support vector classification (primal)

    Parameter parameter = new Parameter(solver2, C, eps);
    // System.out.println("param set ok");
    // System.out.println("number of features: " + vc.getNumOfFeatures());

    long start = System.nanoTime();
    System.out.println("training...");
    PrintStream original = System.out;
    System.setOut(
        new PrintStream(
            new OutputStream() {
              @Override
              public void write(int arg0) throws IOException {}
            }));

    Model model = Linear.train(problem, parameter);
    long end = System.nanoTime();
    Long elapsedTime = end - start;
    System.setOut(original);
    System.out.println(
        "training process completed in: " + NANOSECONDS.toSeconds(elapsedTime) + " seconds.");
    // System.out.println("trained");

    // decide model path and naming and/or way of deleting/creating 1 or more models.
    // File inFile = new File(inputFilePath).getParentFile();

    File modelFile;
    if (USE_CLASS_FEATURES) {
      modelFile = new File(modelDirectory.getAbsolutePath() + "/model_with_classes_c=" + param);
    } else {
      modelFile =
          new File(modelDirectory.getAbsolutePath() + "/model_geometries_textual_c=" + param);
    }

    if (modelFile.exists()) {
      modelFile.delete();
    }
    try {
      // System.out.println("file created");
      model.save(modelFile);
      System.out.println("model saved at: " + modelFile);
    } catch (IOException ex) {
      Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex);
    }

    // end of evaluation training

    // test set
    List<OSMWay> testList = new ArrayList<>();
    for (int g = c * testSize; g < d * testSize; g++) {
      testList.add(wayList.get(g));
      // liblinear test
    }
    System.out.println("testList size: " + testList.size());
    int succededInstances = 0;
    int succededInstances5 = 0;
    int succededInstances10 = 0;
    try {
      model = Model.load(modelFile);
    } catch (IOException ex) {
      Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex);
    }
    int modelLabelSize = model.getLabels().length;
    int[] labels = model.getLabels();
    Map<Integer, Integer> mapLabelsToIDs = new HashMap<>();
    for (int h = 0; h < model.getLabels().length; h++) {
      mapLabelsToIDs.put(labels[h], h);

      // System.out.println(h + "   <->    " + labels[h]);
    }

    int wayListSizeWithoutUnclassified2 = testList.size();
    for (OSMWay way : testList) {

      OSMClassification classifyInstances = new OSMClassification();
      classifyInstances.calculateClasses(
          way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);
      if (way.getClassIDs().isEmpty()) {
        // System.out.println("found unclassified" + way.getClassIDs() + "class: "
        // +way.getClassID());
        wayListSizeWithoutUnclassified2 -= 1;
        // u++;
      }
    }

    FeatureNode[] testInstance2;
    for (OSMWay way : testList) {

      int id;

      if (USE_CLASS_FEATURES) {
        ClassFeatures class_vector = new ClassFeatures();
        class_vector.createClassFeatures(
            way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);
        id = 1422;
      } else {
        id = 1;
      }

      // pass id also: 1422 if using classes, 1 if not
      GeometryFeatures geometryFeatures = new GeometryFeatures(id);
      geometryFeatures.createGeometryFeatures(way);
      id = geometryFeatures.getLastID();
      // id after geometry, cases: all geometry features with mean-variance boolean intervals:
      // id = 1526
      // System.out.println("id 1526 -> " + geometryFeatures.getLastID());
      if (USE_RELATION_FEATURES) {
        RelationFeatures relationFeatures = new RelationFeatures(id);
        relationFeatures.createRelationFeatures(way, relationList);
        id = relationFeatures.getLastID();
      } else {
        id = geometryFeatures.getLastID();
        // System.out.println("geom feat " + id);
      }
      // id 1531
      // System.out.println("id 1532 -> " + relationFeatures.getLastID());
      if (USE_TEXTUAL_FEATURES) {
        TextualFeatures textualFeatures = new TextualFeatures(id, namesList, languageDetector);
        textualFeatures.createTextualFeatures(way);
        // System.out.println("last textual id: " + textualFeatures.getLastID());
        // System.out.println("full:  " + way.getFeatureNodeList());
      } else {

      }
      List<FeatureNode> featureNodeList = way.getFeatureNodeList();

      FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()];

      int i = 0;
      for (FeatureNode featureNode : featureNodeList) {
        featureNodeArray[i] = featureNode;
        i++;
      }

      testInstance2 = featureNodeArray;
      // double prediction = Linear.predict(model, testInstance2);
      // System.out.println("test prediction: " + prediction);
      double[] scores = new double[modelLabelSize];
      Linear.predictValues(model, testInstance2, scores);

      // find index of max values in scores array: predicted classes are the elements of these
      // indexes from array model.getlabels
      // iter scores and find 10 max values with their indexes first. then ask those indexes from
      // model.getlabels
      Map<Double, Integer> scoresValues = new HashMap<>();
      for (int h = 0; h < scores.length; h++) {
        scoresValues.put(scores[h], h);
        // System.out.println(h + "   <->    " + scores[h]);
      }

      Arrays.sort(scores);
      // System.out.println("max value: " + scores[scores.length-1] + " second max: " +
      // scores[scores.length-2]);
      // System.out.println("ask this index from labels: " +
      // scoresValues.get(scores[scores.length-1]));
      // System.out.println("got from labels: " +
      // labels[scoresValues.get(scores[scores.length-1])]);
      // System.out.println("next prediction: " +
      // labels[scoresValues.get(scores[scores.length-2])]);
      // System.out.println("way labels: " + way.getClassIDs());
      // System.out.println("test prediction: " + prediction);
      if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])) {
        succededInstances++;
      }
      if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])])) {
        succededInstances5++;
      }
      if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 6])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 7])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 8])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 9])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 10])])) {
        succededInstances10++;
      }
      // System.out.println("labels: " + Arrays.toString(model.getLabels()));
      // System.out.println("label[0]: " + model.getLabels()[0]);
    }

    System.out.println(
        "Succeeded "
            + succededInstances
            + " of "
            + testList.size()
            + " total (1 class prediction)");
    double precision1 = succededInstances / (double) wayListSizeWithoutUnclassified2;
    score1 = precision1;
    System.out.println(precision1);

    System.out.println(
        "Succeeded "
            + succededInstances5
            + " of "
            + testList.size()
            + " total (5 class prediction)");
    double precision5 = succededInstances5 / (double) wayListSizeWithoutUnclassified2;
    score5 = precision5;
    System.out.println(precision5);

    System.out.println(
        "Succeeded "
            + succededInstances10
            + " of "
            + testList.size()
            + " total (10 class prediction)");
    double precision10 = succededInstances10 / (double) wayListSizeWithoutUnclassified2;
    score10 = precision10;
    System.out.println(precision10);
  }