コード例 #1
0
  /**
   * Initializes training. Runs through all data points in the training set and updates the weight
   * vector whenever a classification error occurs.
   *
   * <p>Can be called multiple times.
   *
   * @param dataset the dataset to train on. Each column is treated as point.
   * @param labelset the set of labels, one for each data point. If the cardinalities of data- and
   *     labelset do not match, a CardinalityException is thrown
   */
  public void train(Vector labelset, Matrix dataset) throws TrainingException {
    if (labelset.size() != dataset.columnSize()) {
      throw new CardinalityException(labelset.size(), dataset.columnSize());
    }

    boolean converged = false;
    int iteration = 0;
    while (!converged) {
      if (iteration > 1000) {
        throw new TrainingException("Too many iterations needed to find hyperplane.");
      }

      converged = true;
      int columnCount = dataset.columnSize();
      for (int i = 0; i < columnCount; i++) {
        Vector dataPoint = dataset.viewColumn(i);
        log.debug("Training point: {}", dataPoint);

        synchronized (this.model) {
          boolean prediction = model.classify(dataPoint);
          double label = labelset.get(i);
          if (label <= 0 && prediction || label > 0 && !prediction) {
            log.debug("updating");
            converged = false;
            update(label, dataPoint, this.model);
          }
        }
      }
      iteration++;
    }
  }
コード例 #2
0
  @Override
  public double getScore(DocData Doc, QueryData query, RankingModel rModel) {

    HashMap<String, Double> FeatureWeights = rModel.FeatureWeights;

    TermFreqUtil.enableStemming = true;

    List<String> termVector = new ArrayList<String>();

    List<Double> queryVector =
        Vectorizer.getQueryVector(
            query.GetQueryString(),
            termVector,
            TermSmoothingType.LOGARITHM,
            DocSmoothingType.IDF,
            rModel);

    Map<DocFeature, List<Double>> docVector =
        Vectorizer.getDocumentVector(
            Doc,
            termVector,
            TermSmoothingType.LOGARITHM,
            DocSmoothingType.NONE,
            NormalizationType.LENGTH,
            rModel);

    HashMap<String, Double> FeatureVariable = new HashMap<String, Double>();

    double FeatureURL = VectorUtil.dot(queryVector, docVector.get(DocFeature.URL));
    double FeatureTitle = VectorUtil.dot(queryVector, docVector.get(DocFeature.TITLE));
    double FeatureBody = VectorUtil.dot(queryVector, docVector.get(DocFeature.BODY));
    double FeatureHeaderL = VectorUtil.dot(queryVector, docVector.get(DocFeature.HEADER));
    double FeatureAnchor = VectorUtil.dot(queryVector, docVector.get(DocFeature.ANCHOR));

    double FirstPos = getFirstPos(Doc, query);

    FeatureVariable.put("task4_W_url", FeatureURL);
    FeatureVariable.put("task4_W_title", FeatureTitle);
    FeatureVariable.put("task4_W_body", FeatureBody);
    FeatureVariable.put("task4_W_header", FeatureHeaderL);
    FeatureVariable.put("task4_W_anchor", FeatureAnchor);
    FeatureVariable.put("task4_W_first_pos", FirstPos);

    try {

      return LinearModel.CalculateScore(FeatureVariable, FeatureWeights);
    } catch (NoSuchFieldException e) {
      e.printStackTrace();
    }
    return 0;
  }