Beispiel #1
0
  public static void MLalgo() {
    try {
      Problem problem = new Problem();
      problem.l = train_count; // number of training examples
      problem.n = max_feature_count; // number of features
      problem.x = train_matrix; // feature nodes
      problem.y = ylable; // target values;

      SolverType solver = SolverType.L2R_LR; // -s 0
      double C = 1.0; // cost of constraints violation
      double eps = 0.01; // stopping criteria

      Parameter parameter = new Parameter(solver, C, eps);
      model = Linear.train(problem, parameter);

      File modelFile = new File("model");
      model.save(modelFile);
      // load model or use it directly
      model = Model.load(modelFile);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
  private void mainClassifierFunction(int option, String trainFile, String testFile, String ddgFile)
      throws IOException {
    // SentimentClassifierHindi this = new SentimentClassifierHindi();
    // int finalSize = this.SentimentClassifierHindi();
    int finalSize = this.generateFeature(option, trainFile, testFile, ddgFile);
    System.out.println("Hello aspectCategorizationSemEval2016!");

    // Create features
    Problem problem = new Problem();

    // Save X to problem
    double a[] = new double[this.trainingFeature.size()];
    File file = new File(rootDirectory + "\\dataset\\trainingLabels.txt");
    BufferedReader reader = new BufferedReader(new FileReader(file));
    String read;
    int count = 0;
    while ((read = reader.readLine()) != null) {
      // System.out.println(read);
      a[count++] = Double.parseDouble(read.toString());
    }

    // Feature[][] f = new Feature[][]{ {}, {}, {}, {}, {}, {} };

    // trainingFeature = trainingObject.getList();
    Feature[][] trainFeatureVector = new Feature[trainingFeature.size()][finalSize];

    System.out.println("Training Instances: " + trainingFeature.size());
    System.out.println("Feature Length: " + finalSize);
    System.out.println("Test Instances: " + testFeature.size());

    for (int i = 0; i < trainingFeature.size(); i++) {
      // System.out.println();
      // System.out.println(trainingFeature.get(i));
      System.out.println(i + " trained.");
      for (int j = 0; j < finalSize; j++) {
        // System.out.print(trainingFeature.get(i).get(j + 1)+" ");
        // trainingFeature.get(i).
        if (trainingFeature.get(i).containsKey(j + 1)) {
          // System.out.print(j + 1 + ", ");
          trainFeatureVector[i][j] = new FeatureNode(j + 1, trainingFeature.get(i).get(j + 1));
        } else {
          trainFeatureVector[i][j] = new FeatureNode(j + 1, 0.0);
        }
      }
      // System.out.println();
    }

    problem.l = trainingFeature.size(); // number of training examples
    problem.n = finalSize; // number of features
    problem.x = trainFeatureVector; // feature nodes
    problem.y = a; // target values ----

    BasicParser bp = new BasicParser();

    SolverType solver = SolverType.L2R_LR; // -s 7
    double C = 0.75; // cost of constraints violation
    double eps = 0.0001; // stopping criteria

    Parameter parameter = new Parameter(solver, C, eps);
    Model model = Linear.train(problem, parameter);
    File modelFile = new File("model");
    model.save(modelFile);

    // PrintWriter write = new PrintWriter(new BufferedWriter(new FileWriter(rootDirectory +
    // "\\dataset\\predictedLabels.txt")));
    PrintWriter write =
        new PrintWriter(
            new BufferedWriter(
                new FileWriter(
                    rootDirectory
                        + "\\dataset\\dataset_aspectCategorization\\predictedHotelsLabels.txt")));

    if (option == 1) {
      BufferedReader trainReader =
          new BufferedReader(
              new FileReader(
                  new File(
                      rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + trainFile)));
      HashMap<String, Integer> id = new HashMap<String, Integer>();
      HashMap<String, String> review = new HashMap<String, String>();
      double[] val = new double[trainingFeature.size()];
      double[] tempVal = new double[trainingFeature.size()];
      LinearCopy.crossValidation(problem, parameter, 5, val, tempVal);
      for (int i = 0; i < trainingFeature.size(); i++) {
        int flag = 0;
        String tokens[] = trainReader.readLine().split("\\|");
        if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) {
        } else {
          // System.out.println(tokens[1]);
          /*int max = -1;
          double probMax = -1.0;
          for(int j=0; j<13; j++){
              if(probMax<val[i][j]){
                  probMax = val[i][j];
                  max = j;
              }
          }*/
          // System.out.println(tempVal[i]);
          write.println((int) (val[i]));
          write.println("next");
          id.put(tokens[1], 1);
          System.out.println(tokens[1] + "\t" + (int) (val[i]));
          if (review.containsKey(tokens[1])) {
            System.out.println(tokens[3]);
            System.out.println(review.get(tokens[1]));
          } else {
            review.put(tokens[1], tokens[3]);
          }
        } /*else{
              for (int j = 0; j < 13; j++) {
                  //System.out.print(val[i][j]+", ");
                  if (val[i] >= 0.185) {
                      flag = 1;
                      //System.out.println("i");
                      write.println(j + 1);
                  }
              }
              if (flag == 1) {
                  write.println("next");
              } else {
                  write.println("-1");
                  write.println("next");
              }
              //write.println(prediction);
              id.put(tokens[1], 1);
              //System.out.println();
          }*/
      }
      write.close();
      return;
    }

    if (option == 3) {
      System.out.println(rootDirectory);
      BufferedReader testReader =
          new BufferedReader(
              new FileReader(
                  new File(
                      rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + testFile)));
      HashMap<String, Integer> id = new HashMap<String, Integer>();
      model = Model.load(modelFile);
      int countNext = 0;
      for (int i = 0; i < testFeature.size(); i++) {
        // System.out.println(i+", "+testFeature.size()+", "+testFeature.get(i).size());
        Feature[] instance = new Feature[testFeature.get(i).size()];
        int j = 0;
        for (Map.Entry<Integer, Double> entry : testFeature.get(i).entrySet()) {
          // System.out.print(entry.getKey() + ": " + entry.getValue() + ";   ");
          // listOfMaps.get(i).put(start + entry.getKey(), entry.getValue());
          // do stuff
          instance[j++] = new FeatureNode(entry.getKey(), entry.getValue());
        }

        // double d = LinearCopy.predict(model, instance);

        double[] predict = new double[85];
        double prediction = LinearCopy.predictProbability(model, instance, predict);

        int labelMap[] = new int[13];
        labelMap = model.getLabels();

        for (int ar = 0; ar < labelMap.length; ar++) {
          System.out.println("********************** " + ar + ": " + labelMap[ar]);
        }

        // System.out.println(prediction);
        // Arrays.sort(predict, Collections.reverseOrder());
        // System.out.println();
        // double prediction = LinearCopy.predict(model, instance);
        String tokens[] = testReader.readLine().split("\\|");
        // System.out.println(tokens[1]);

        int flag = -1;
        if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) {
          flag = 4;
          // System.out.println("OutofScope: "+tokens[1]);
        } else if (tokens[3].compareToIgnoreCase("abc") == 0) {
          flag = 2;
          System.out.println(tokens[1]);
          write.println("-1");
          write.println("next");
          countNext++;
          id.put(tokens[1], 1);
        } else {
          flag = 0;
          for (int p = 0; p < 85; p++) {
            if (predict[p] >= 0.128) {
              flag = 1;
              write.println(labelMap[p]);
            }
          }
          if (flag == 1) {
            countNext++;
            write.println("next");
          } else {
            countNext++;
            write.println("-1");
            write.println("next");
          }

          // write.println((int)d);
          // write.println("next");

          /*write.println(prediction);
          write.println("next");*/
          id.put(tokens[1], 1);
        }

        if (flag == -1) {
          System.out.println("-1,   " + tokens[1]);
        }
      }

      write.close();
      System.out.println("count " + countNext);
    }
    write.close();
  }