/** * Initializes training. Runs through all data points in the training set and updates the weight * vector whenever a classification error occurs. * * <p>Can be called multiple times. * * @param dataset the dataset to train on. Each column is treated as point. * @param labelset the set of labels, one for each data point. If the cardinalities of data- and * labelset do not match, a CardinalityException is thrown */ public void train(Vector labelset, Matrix dataset) throws TrainingException { if (labelset.size() != dataset.columnSize()) { throw new CardinalityException(labelset.size(), dataset.columnSize()); } boolean converged = false; int iteration = 0; while (!converged) { if (iteration > 1000) { throw new TrainingException("Too many iterations needed to find hyperplane."); } converged = true; int columnCount = dataset.columnSize(); for (int i = 0; i < columnCount; i++) { Vector dataPoint = dataset.viewColumn(i); log.debug("Training point: {}", dataPoint); synchronized (this.model) { boolean prediction = model.classify(dataPoint); double label = labelset.get(i); if (label <= 0 && prediction || label > 0 && !prediction) { log.debug("updating"); converged = false; update(label, dataPoint, this.model); } } } iteration++; } }
@Override public double getScore(DocData Doc, QueryData query, RankingModel rModel) { HashMap<String, Double> FeatureWeights = rModel.FeatureWeights; TermFreqUtil.enableStemming = true; List<String> termVector = new ArrayList<String>(); List<Double> queryVector = Vectorizer.getQueryVector( query.GetQueryString(), termVector, TermSmoothingType.LOGARITHM, DocSmoothingType.IDF, rModel); Map<DocFeature, List<Double>> docVector = Vectorizer.getDocumentVector( Doc, termVector, TermSmoothingType.LOGARITHM, DocSmoothingType.NONE, NormalizationType.LENGTH, rModel); HashMap<String, Double> FeatureVariable = new HashMap<String, Double>(); double FeatureURL = VectorUtil.dot(queryVector, docVector.get(DocFeature.URL)); double FeatureTitle = VectorUtil.dot(queryVector, docVector.get(DocFeature.TITLE)); double FeatureBody = VectorUtil.dot(queryVector, docVector.get(DocFeature.BODY)); double FeatureHeaderL = VectorUtil.dot(queryVector, docVector.get(DocFeature.HEADER)); double FeatureAnchor = VectorUtil.dot(queryVector, docVector.get(DocFeature.ANCHOR)); double FirstPos = getFirstPos(Doc, query); FeatureVariable.put("task4_W_url", FeatureURL); FeatureVariable.put("task4_W_title", FeatureTitle); FeatureVariable.put("task4_W_body", FeatureBody); FeatureVariable.put("task4_W_header", FeatureHeaderL); FeatureVariable.put("task4_W_anchor", FeatureAnchor); FeatureVariable.put("task4_W_first_pos", FirstPos); try { return LinearModel.CalculateScore(FeatureVariable, FeatureWeights); } catch (NoSuchFieldException e) { e.printStackTrace(); } return 0; }