/** * Calculate the similarity between a sentence Vj and all the sentences Vk where Vj has an * outgoing edge to Vk. * * @param sentence_j Current sentence node Vj. * @param language Language the text to be compared * @return sum of all the similarity calculations between sentence Vj and all other sentences Vk, * Vj has outgoing edges to. */ private double getSumWjk(Node sentence_j, Language language) { double sumWjk = 0; if (language.equals(Language.ICELANDIC)) { for (Node sentence_k : sentence_j.edgesOUT) { sumWjk += sentence_j.similarity(sentence_k); } } else if (language.equals(Language.ENGLISH)) { for (Node sentence_k : sentence_j.edgesOUT) { sumWjk += sentence_j.similarityEN(sentence_k); } } return sumWjk; }
/** * Calculates the rank weighting scores for all the nodes in the graph. Iterative calculates over * the graph until convergence at the standard error threshold or until max iterations. * * @param maxIterations Max number of iterations allowed for calculating rank scores * @param language Language of the text to calculate rank weighting scores for Available * languages: Icelandic and English */ public void weigthingScore(int maxIterations, Language language) { LinkedList<Node> nodes = new LinkedList<Node>(); // Add nodes to LinkedList, we need them to stay in order for (int i = 0; i < nodeList.length; i++) { nodes.add(nodeList[i]); } /* WS(Vi) = ( 1 - d) + d * Sum(VjIn) ________Wij________ * WS(Vj) Sum(Vk outVj) Wjk */ for (int k = 0; k < maxIterations; k++) { distStats.clear(); // Use dynamic programming to calculate the scores double previousWSScore[] = new double[nodes.size()]; // Read in scores already calculated for nodes for (Node s : nodeList) { previousWSScore[nodes.indexOf(s)] = s.rank; } // For all nodes in the graph for (Node sentence_i : nodes) { double resultSumVji = 0; // For all in-coming edges of Vi for (Node sentence_j : sentence_i.edgesIN) { // Do not compare a sentence to it self, we do not allow self voting here if (!sentence_j.value.text.equalsIgnoreCase(sentence_i.value.text)) { // Calculate the sum of all similarity measurements // from all Vj nodes with outgoing edges to Vk nodes, see Wjk in equation double sumWjk = getSumWjk(sentence_j, language); if (sumWjk != 0) { double Wji = 0.0; if (language.equals(Language.ICELANDIC)) { // Calculate Wij, similarity between two sentences Wji = sentence_i.similarity(sentence_j); } else if (language.equals(Language.ENGLISH)) { // Calculate Wij, similarity between two sentences Wji = sentence_i.similarityEN(sentence_j); } // Get the score for the previous node double WSVj = previousWSScore[nodes.indexOf(sentence_j)]; // Sum all (j in Vj) resultSumVji += ((Wji / sumWjk) * WSVj); } } } // Calculate weighting score WS(Vi) double WSVi = (1.0 - TEXTRANK_DAMPING_FACTOR) + TEXTRANK_DAMPING_FACTOR * resultSumVji; distStats.addValue(Math.abs(sentence_i.rank - WSVi)); sentence_i.rank = WSVi; } // Calculate the Standard Error of the Mean final double standard_error = distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN()); // if std error of the mean is less than threshold // the graph has converged and we break if (standard_error < STANDARD_ERROR_THRESHOLD) { break; } } }