Exemplo n.º 1
0
  /**
   * Calculate the similarity between a sentence Vj and all the sentences Vk where Vj has an
   * outgoing edge to Vk.
   *
   * @param sentence_j Current sentence node Vj.
   * @param language Language the text to be compared
   * @return sum of all the similarity calculations between sentence Vj and all other sentences Vk,
   *     Vj has outgoing edges to.
   */
  private double getSumWjk(Node sentence_j, Language language) {
    double sumWjk = 0;

    if (language.equals(Language.ICELANDIC)) {
      for (Node sentence_k : sentence_j.edgesOUT) {
        sumWjk += sentence_j.similarity(sentence_k);
      }

    } else if (language.equals(Language.ENGLISH)) {
      for (Node sentence_k : sentence_j.edgesOUT) {
        sumWjk += sentence_j.similarityEN(sentence_k);
      }
    }
    return sumWjk;
  }
Exemplo n.º 2
0
  /**
   * Calculates the rank weighting scores for all the nodes in the graph. Iterative calculates over
   * the graph until convergence at the standard error threshold or until max iterations.
   *
   * @param maxIterations Max number of iterations allowed for calculating rank scores
   * @param language Language of the text to calculate rank weighting scores for Available
   *     languages: Icelandic and English
   */
  public void weigthingScore(int maxIterations, Language language) {

    LinkedList<Node> nodes = new LinkedList<Node>();

    // Add nodes to LinkedList, we need them to stay in order
    for (int i = 0; i < nodeList.length; i++) {
      nodes.add(nodeList[i]);
    }

    /*

    WS(Vi) = ( 1 - d) + d * Sum(VjIn)  ________Wij________ * WS(Vj)
    									  Sum(Vk outVj) Wjk

    */
    for (int k = 0; k < maxIterations; k++) {
      distStats.clear();

      // Use dynamic programming to calculate the scores
      double previousWSScore[] = new double[nodes.size()];

      // Read in scores already calculated for nodes
      for (Node s : nodeList) {
        previousWSScore[nodes.indexOf(s)] = s.rank;
      }

      // For all nodes in the graph
      for (Node sentence_i : nodes) {
        double resultSumVji = 0;

        // For all in-coming edges of Vi
        for (Node sentence_j : sentence_i.edgesIN) {

          // Do not compare a sentence to it self, we do not allow self voting here
          if (!sentence_j.value.text.equalsIgnoreCase(sentence_i.value.text)) {

            // Calculate the sum of all similarity measurements
            // from all Vj nodes with outgoing edges to Vk nodes, see Wjk in equation
            double sumWjk = getSumWjk(sentence_j, language);

            if (sumWjk != 0) {
              double Wji = 0.0;
              if (language.equals(Language.ICELANDIC)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarity(sentence_j);

              } else if (language.equals(Language.ENGLISH)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarityEN(sentence_j);
              }
              // Get the score for the previous node
              double WSVj = previousWSScore[nodes.indexOf(sentence_j)];

              // Sum all (j in Vj)
              resultSumVji += ((Wji / sumWjk) * WSVj);
            }
          }
        }
        // Calculate weighting score WS(Vi)
        double WSVi = (1.0 - TEXTRANK_DAMPING_FACTOR) + TEXTRANK_DAMPING_FACTOR * resultSumVji;
        distStats.addValue(Math.abs(sentence_i.rank - WSVi));
        sentence_i.rank = WSVi;
      }
      // Calculate the Standard Error of the Mean
      final double standard_error =
          distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN());

      // if std error of the mean is less than threshold
      // the graph has converged and we break
      if (standard_error < STANDARD_ERROR_THRESHOLD) {
        break;
      }
    }
  }