コード例 #1
0
  @Override
  public void initialize(Parameters params) throws ResourceInitializationException {
    super.initialize(params);
    try {
      model = Model.load(((ResourceReader) params.get(Constants.MODEL)).getReader());
    } catch (IOException e) {
      throw new ResourceInitializationException("Failed to load SVM model.", e);
    }

    labelIndeces = new int[labels.size()];
    labelIndeces = model.getLabels();
    representer = (TextRepresenter) params.get(Constants.REPRESENTER);
  }
コード例 #2
0
ファイル: ML.java プロジェクト: Shraventhe/InfoRetrieval
  public static void MLalgo() {
    try {
      Problem problem = new Problem();
      problem.l = train_count; // number of training examples
      problem.n = max_feature_count; // number of features
      problem.x = train_matrix; // feature nodes
      problem.y = ylable; // target values;

      SolverType solver = SolverType.L2R_LR; // -s 0
      double C = 1.0; // cost of constraints violation
      double eps = 0.01; // stopping criteria

      Parameter parameter = new Parameter(solver, C, eps);
      model = Linear.train(problem, parameter);

      File modelFile = new File("model");
      model.save(modelFile);
      // load model or use it directly
      model = Model.load(modelFile);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
コード例 #3
0
  private void mainClassifierFunction(int option, String trainFile, String testFile, String ddgFile)
      throws IOException {
    // SentimentClassifierHindi this = new SentimentClassifierHindi();
    // int finalSize = this.SentimentClassifierHindi();
    int finalSize = this.generateFeature(option, trainFile, testFile, ddgFile);
    System.out.println("Hello aspectCategorizationSemEval2016!");

    // Create features
    Problem problem = new Problem();

    // Save X to problem
    double a[] = new double[this.trainingFeature.size()];
    File file = new File(rootDirectory + "\\dataset\\trainingLabels.txt");
    BufferedReader reader = new BufferedReader(new FileReader(file));
    String read;
    int count = 0;
    while ((read = reader.readLine()) != null) {
      // System.out.println(read);
      a[count++] = Double.parseDouble(read.toString());
    }

    // Feature[][] f = new Feature[][]{ {}, {}, {}, {}, {}, {} };

    // trainingFeature = trainingObject.getList();
    Feature[][] trainFeatureVector = new Feature[trainingFeature.size()][finalSize];

    System.out.println("Training Instances: " + trainingFeature.size());
    System.out.println("Feature Length: " + finalSize);
    System.out.println("Test Instances: " + testFeature.size());

    for (int i = 0; i < trainingFeature.size(); i++) {
      // System.out.println();
      // System.out.println(trainingFeature.get(i));
      System.out.println(i + " trained.");
      for (int j = 0; j < finalSize; j++) {
        // System.out.print(trainingFeature.get(i).get(j + 1)+" ");
        // trainingFeature.get(i).
        if (trainingFeature.get(i).containsKey(j + 1)) {
          // System.out.print(j + 1 + ", ");
          trainFeatureVector[i][j] = new FeatureNode(j + 1, trainingFeature.get(i).get(j + 1));
        } else {
          trainFeatureVector[i][j] = new FeatureNode(j + 1, 0.0);
        }
      }
      // System.out.println();
    }

    problem.l = trainingFeature.size(); // number of training examples
    problem.n = finalSize; // number of features
    problem.x = trainFeatureVector; // feature nodes
    problem.y = a; // target values ----

    BasicParser bp = new BasicParser();

    SolverType solver = SolverType.L2R_LR; // -s 7
    double C = 0.75; // cost of constraints violation
    double eps = 0.0001; // stopping criteria

    Parameter parameter = new Parameter(solver, C, eps);
    Model model = Linear.train(problem, parameter);
    File modelFile = new File("model");
    model.save(modelFile);

    // PrintWriter write = new PrintWriter(new BufferedWriter(new FileWriter(rootDirectory +
    // "\\dataset\\predictedLabels.txt")));
    PrintWriter write =
        new PrintWriter(
            new BufferedWriter(
                new FileWriter(
                    rootDirectory
                        + "\\dataset\\dataset_aspectCategorization\\predictedHotelsLabels.txt")));

    if (option == 1) {
      BufferedReader trainReader =
          new BufferedReader(
              new FileReader(
                  new File(
                      rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + trainFile)));
      HashMap<String, Integer> id = new HashMap<String, Integer>();
      HashMap<String, String> review = new HashMap<String, String>();
      double[] val = new double[trainingFeature.size()];
      double[] tempVal = new double[trainingFeature.size()];
      LinearCopy.crossValidation(problem, parameter, 5, val, tempVal);
      for (int i = 0; i < trainingFeature.size(); i++) {
        int flag = 0;
        String tokens[] = trainReader.readLine().split("\\|");
        if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) {
        } else {
          // System.out.println(tokens[1]);
          /*int max = -1;
          double probMax = -1.0;
          for(int j=0; j<13; j++){
              if(probMax<val[i][j]){
                  probMax = val[i][j];
                  max = j;
              }
          }*/
          // System.out.println(tempVal[i]);
          write.println((int) (val[i]));
          write.println("next");
          id.put(tokens[1], 1);
          System.out.println(tokens[1] + "\t" + (int) (val[i]));
          if (review.containsKey(tokens[1])) {
            System.out.println(tokens[3]);
            System.out.println(review.get(tokens[1]));
          } else {
            review.put(tokens[1], tokens[3]);
          }
        } /*else{
              for (int j = 0; j < 13; j++) {
                  //System.out.print(val[i][j]+", ");
                  if (val[i] >= 0.185) {
                      flag = 1;
                      //System.out.println("i");
                      write.println(j + 1);
                  }
              }
              if (flag == 1) {
                  write.println("next");
              } else {
                  write.println("-1");
                  write.println("next");
              }
              //write.println(prediction);
              id.put(tokens[1], 1);
              //System.out.println();
          }*/
      }
      write.close();
      return;
    }

    if (option == 3) {
      System.out.println(rootDirectory);
      BufferedReader testReader =
          new BufferedReader(
              new FileReader(
                  new File(
                      rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + testFile)));
      HashMap<String, Integer> id = new HashMap<String, Integer>();
      model = Model.load(modelFile);
      int countNext = 0;
      for (int i = 0; i < testFeature.size(); i++) {
        // System.out.println(i+", "+testFeature.size()+", "+testFeature.get(i).size());
        Feature[] instance = new Feature[testFeature.get(i).size()];
        int j = 0;
        for (Map.Entry<Integer, Double> entry : testFeature.get(i).entrySet()) {
          // System.out.print(entry.getKey() + ": " + entry.getValue() + ";   ");
          // listOfMaps.get(i).put(start + entry.getKey(), entry.getValue());
          // do stuff
          instance[j++] = new FeatureNode(entry.getKey(), entry.getValue());
        }

        // double d = LinearCopy.predict(model, instance);

        double[] predict = new double[85];
        double prediction = LinearCopy.predictProbability(model, instance, predict);

        int labelMap[] = new int[13];
        labelMap = model.getLabels();

        for (int ar = 0; ar < labelMap.length; ar++) {
          System.out.println("********************** " + ar + ": " + labelMap[ar]);
        }

        // System.out.println(prediction);
        // Arrays.sort(predict, Collections.reverseOrder());
        // System.out.println();
        // double prediction = LinearCopy.predict(model, instance);
        String tokens[] = testReader.readLine().split("\\|");
        // System.out.println(tokens[1]);

        int flag = -1;
        if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) {
          flag = 4;
          // System.out.println("OutofScope: "+tokens[1]);
        } else if (tokens[3].compareToIgnoreCase("abc") == 0) {
          flag = 2;
          System.out.println(tokens[1]);
          write.println("-1");
          write.println("next");
          countNext++;
          id.put(tokens[1], 1);
        } else {
          flag = 0;
          for (int p = 0; p < 85; p++) {
            if (predict[p] >= 0.128) {
              flag = 1;
              write.println(labelMap[p]);
            }
          }
          if (flag == 1) {
            countNext++;
            write.println("next");
          } else {
            countNext++;
            write.println("-1");
            write.println("next");
          }

          // write.println((int)d);
          // write.println("next");

          /*write.println(prediction);
          write.println("next");*/
          id.put(tokens[1], 1);
        }

        if (flag == -1) {
          System.out.println("-1,   " + tokens[1]);
        }
      }

      write.close();
      System.out.println("count " + countNext);
    }
    write.close();
  }
コード例 #4
0
  public void crossValidateFold(int a, int b, int c, int d, boolean skip, double param) {
    System.out.println("Starting cross validation");
    int testSize = wayList.size() / 5;

    List<OSMWay> trainList = new ArrayList<>();
    for (int g = a * testSize; g < b * testSize; g++) { // 0~~1~~2~~3~~4~~5
      if (skip) {
        if (g == (c) * testSize) {
          g = (c + 1) * testSize;
        }
      }
      trainList.add(wayList.get(g));
    }

    int wayListSizeWithoutUnclassified = trainList.size();
    int u = 0;
    System.out.println("trainList size: " + wayListSizeWithoutUnclassified);

    // set classes for each osm instance
    int sizeToBeAddedToArray =
        0; // this will be used to proper init the features array, adding the multiple vectors size
    int lalala = 0;
    for (OSMWay way : trainList) {

      OSMClassification classifyInstances = new OSMClassification();
      classifyInstances.calculateClasses(
          way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);

      if (way.getClassIDs().isEmpty()) {
        wayListSizeWithoutUnclassified -= 1;
        u++;
      } else {
        sizeToBeAddedToArray = sizeToBeAddedToArray + way.getClassIDs().size() - 1;
      }
    }
    double C = param;
    double eps = 0.001;
    double[] GROUPS_ARRAY2 =
        new double[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]; // new double[117558];//
    FeatureNode[][] trainingSetWithUnknown2 =
        new FeatureNode[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]
            [numberOfFeatures]; // working[3812];
    int k = 0;

    for (OSMWay way : trainList) {
      // adding multiple vectors
      int id;
      if (USE_CLASS_FEATURES) {
        ClassFeatures class_vector = new ClassFeatures();
        class_vector.createClassFeatures(
            way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);
        id = 1422;
      } else {
        id = 1;
      }
      // pass id also: 1422 if using classes, 1 if not
      GeometryFeatures geometryFeatures = new GeometryFeatures(id);
      geometryFeatures.createGeometryFeatures(way);
      id = geometryFeatures.getLastID();
      // id after geometry, cases: all geometry features with mean-variance boolean intervals:
      // id = 1526
      if (USE_RELATION_FEATURES) {
        RelationFeatures relationFeatures = new RelationFeatures(id);
        relationFeatures.createRelationFeatures(way, relationList);
        id = relationFeatures.getLastID();
      } else {
        id = geometryFeatures.getLastID();
      }
      // id 1531

      TextualFeatures textualFeatures;
      if (USE_TEXTUAL_FEATURES) {
        textualFeatures = new TextualFeatures(id, namesList, languageDetector);
        textualFeatures.createTextualFeatures(way);
      }

      List<FeatureNode> featureNodeList = way.getFeatureNodeList();
      FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()];

      if (!way.getClassIDs().isEmpty()) {
        int i = 0;
        for (FeatureNode featureNode : featureNodeList) {
          featureNodeArray[i] = featureNode;
          i++;
        }
        for (int classID : way.getClassIDs()) {
          lalala++;
          trainingSetWithUnknown2[k] = featureNodeArray;
          GROUPS_ARRAY2[k] = classID;
          k++;
        }
      }
    }

    // Linear.enableDebugOutput();
    Problem problem = new Problem();
    problem.l =
        wayListSizeWithoutUnclassified
            + sizeToBeAddedToArray; // wayListSizeWithoutUnclassified;//wayList.size();
    problem.n =
        numberOfFeatures; // (geometry 105 + textual        //3797; // number of features //the
    // largest index of all features //3811;//3812 //1812 with classes
    problem.x = trainingSetWithUnknown2; // feature nodes
    problem.y = GROUPS_ARRAY2; // target values
    // SolverType solver = SolverType.MCSVM_CS; //Cramer and Singer for multiclass classification -
    // equivalent of SVMlight
    SolverType solver2 =
        SolverType.getById(2); // 2 -- L2-regularized L2-loss support vector classification (primal)

    Parameter parameter = new Parameter(solver2, C, eps);
    // System.out.println("param set ok");
    // System.out.println("number of features: " + vc.getNumOfFeatures());

    long start = System.nanoTime();
    System.out.println("training...");
    PrintStream original = System.out;
    System.setOut(
        new PrintStream(
            new OutputStream() {
              @Override
              public void write(int arg0) throws IOException {}
            }));

    Model model = Linear.train(problem, parameter);
    long end = System.nanoTime();
    Long elapsedTime = end - start;
    System.setOut(original);
    System.out.println(
        "training process completed in: " + NANOSECONDS.toSeconds(elapsedTime) + " seconds.");
    // System.out.println("trained");

    // decide model path and naming and/or way of deleting/creating 1 or more models.
    // File inFile = new File(inputFilePath).getParentFile();

    File modelFile;
    if (USE_CLASS_FEATURES) {
      modelFile = new File(modelDirectory.getAbsolutePath() + "/model_with_classes_c=" + param);
    } else {
      modelFile =
          new File(modelDirectory.getAbsolutePath() + "/model_geometries_textual_c=" + param);
    }

    if (modelFile.exists()) {
      modelFile.delete();
    }
    try {
      // System.out.println("file created");
      model.save(modelFile);
      System.out.println("model saved at: " + modelFile);
    } catch (IOException ex) {
      Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex);
    }

    // end of evaluation training

    // test set
    List<OSMWay> testList = new ArrayList<>();
    for (int g = c * testSize; g < d * testSize; g++) {
      testList.add(wayList.get(g));
      // liblinear test
    }
    System.out.println("testList size: " + testList.size());
    int succededInstances = 0;
    int succededInstances5 = 0;
    int succededInstances10 = 0;
    try {
      model = Model.load(modelFile);
    } catch (IOException ex) {
      Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex);
    }
    int modelLabelSize = model.getLabels().length;
    int[] labels = model.getLabels();
    Map<Integer, Integer> mapLabelsToIDs = new HashMap<>();
    for (int h = 0; h < model.getLabels().length; h++) {
      mapLabelsToIDs.put(labels[h], h);

      // System.out.println(h + "   <->    " + labels[h]);
    }

    int wayListSizeWithoutUnclassified2 = testList.size();
    for (OSMWay way : testList) {

      OSMClassification classifyInstances = new OSMClassification();
      classifyInstances.calculateClasses(
          way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);
      if (way.getClassIDs().isEmpty()) {
        // System.out.println("found unclassified" + way.getClassIDs() + "class: "
        // +way.getClassID());
        wayListSizeWithoutUnclassified2 -= 1;
        // u++;
      }
    }

    FeatureNode[] testInstance2;
    for (OSMWay way : testList) {

      int id;

      if (USE_CLASS_FEATURES) {
        ClassFeatures class_vector = new ClassFeatures();
        class_vector.createClassFeatures(
            way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs);
        id = 1422;
      } else {
        id = 1;
      }

      // pass id also: 1422 if using classes, 1 if not
      GeometryFeatures geometryFeatures = new GeometryFeatures(id);
      geometryFeatures.createGeometryFeatures(way);
      id = geometryFeatures.getLastID();
      // id after geometry, cases: all geometry features with mean-variance boolean intervals:
      // id = 1526
      // System.out.println("id 1526 -> " + geometryFeatures.getLastID());
      if (USE_RELATION_FEATURES) {
        RelationFeatures relationFeatures = new RelationFeatures(id);
        relationFeatures.createRelationFeatures(way, relationList);
        id = relationFeatures.getLastID();
      } else {
        id = geometryFeatures.getLastID();
        // System.out.println("geom feat " + id);
      }
      // id 1531
      // System.out.println("id 1532 -> " + relationFeatures.getLastID());
      if (USE_TEXTUAL_FEATURES) {
        TextualFeatures textualFeatures = new TextualFeatures(id, namesList, languageDetector);
        textualFeatures.createTextualFeatures(way);
        // System.out.println("last textual id: " + textualFeatures.getLastID());
        // System.out.println("full:  " + way.getFeatureNodeList());
      } else {

      }
      List<FeatureNode> featureNodeList = way.getFeatureNodeList();

      FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()];

      int i = 0;
      for (FeatureNode featureNode : featureNodeList) {
        featureNodeArray[i] = featureNode;
        i++;
      }

      testInstance2 = featureNodeArray;
      // double prediction = Linear.predict(model, testInstance2);
      // System.out.println("test prediction: " + prediction);
      double[] scores = new double[modelLabelSize];
      Linear.predictValues(model, testInstance2, scores);

      // find index of max values in scores array: predicted classes are the elements of these
      // indexes from array model.getlabels
      // iter scores and find 10 max values with their indexes first. then ask those indexes from
      // model.getlabels
      Map<Double, Integer> scoresValues = new HashMap<>();
      for (int h = 0; h < scores.length; h++) {
        scoresValues.put(scores[h], h);
        // System.out.println(h + "   <->    " + scores[h]);
      }

      Arrays.sort(scores);
      // System.out.println("max value: " + scores[scores.length-1] + " second max: " +
      // scores[scores.length-2]);
      // System.out.println("ask this index from labels: " +
      // scoresValues.get(scores[scores.length-1]));
      // System.out.println("got from labels: " +
      // labels[scoresValues.get(scores[scores.length-1])]);
      // System.out.println("next prediction: " +
      // labels[scoresValues.get(scores[scores.length-2])]);
      // System.out.println("way labels: " + way.getClassIDs());
      // System.out.println("test prediction: " + prediction);
      if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])) {
        succededInstances++;
      }
      if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])])) {
        succededInstances5++;
      }
      if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 6])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 7])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 8])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 9])])
          || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 10])])) {
        succededInstances10++;
      }
      // System.out.println("labels: " + Arrays.toString(model.getLabels()));
      // System.out.println("label[0]: " + model.getLabels()[0]);
    }

    System.out.println(
        "Succeeded "
            + succededInstances
            + " of "
            + testList.size()
            + " total (1 class prediction)");
    double precision1 = succededInstances / (double) wayListSizeWithoutUnclassified2;
    score1 = precision1;
    System.out.println(precision1);

    System.out.println(
        "Succeeded "
            + succededInstances5
            + " of "
            + testList.size()
            + " total (5 class prediction)");
    double precision5 = succededInstances5 / (double) wayListSizeWithoutUnclassified2;
    score5 = precision5;
    System.out.println(precision5);

    System.out.println(
        "Succeeded "
            + succededInstances10
            + " of "
            + testList.size()
            + " total (10 class prediction)");
    double precision10 = succededInstances10 / (double) wayListSizeWithoutUnclassified2;
    score10 = precision10;
    System.out.println(precision10);
  }