Ejemplo n.º 1
0
  /** Sort results to identify potential keywords. */
  public void sortResults(final long max_results) {
    Arrays.sort(
        nodeList,
        new Comparator<Node>() {
          public int compare(Node n1, Node n2) {
            if (n1.rank > n2.rank) {
              return -1;
            } else if (n1.rank < n2.rank) {
              return 1;
            } else {
              return 0;
            }
          }
        });

    // mark the top-ranked nodes
    distStats.clear();

    for (int i = 0; i < nodeList.length; i++) {
      final Node n1 = nodeList[i];

      if (i <= max_results) {
        n1.marked = true;
        distStats.addValue(n1.rank);
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug("n: " + n1.key + " " + n1.rank + " " + n1.marked);

        for (Node n2 : n1.edges) {
          LOG.debug(" - " + n2.key);
        }
      }
    }
  }
Ejemplo n.º 2
0
 /**
  * Calculates the average, min ad max throughput time out of the throughput times of all traces in
  * piList. Next to this, the arrival rate is calculated. All metrics are based on the process
  * instances in piList only
  *
  * @param piList ArrayList: the process instances used
  * @param fitOption int: the fit option used (how to deal with non-conformance)
  * @throws Exception
  */
 public void calculateMetrics(ArrayList piList, int fitOption) throws Exception {
   properFrequency = 0;
   timeStats.clear();
   arrivalStats.clear();
   ArrayList arrivalDates = new ArrayList();
   ListIterator lit = piList.listIterator();
   while (lit.hasNext()) {
     ExtendedLogTrace currentTrace = (ExtendedLogTrace) lit.next();
     if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) {
       properFrequency++;
     }
     try {
       long tp = (currentTrace.getEndDate().getTime() - currentTrace.getBeginDate().getTime());
       if (fitOption == 0) {
         // timeStats based on all traces
         timeStats.addValue(tp);
         arrivalDates.add(currentTrace.getBeginDate());
       }
       if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) {
         if (fitOption == 1) {
           // timeStats based on fitting traces only
           timeStats.addValue(tp);
           arrivalDates.add(currentTrace.getBeginDate());
         }
       }
     } catch (NullPointerException ex) {
       ex.printStackTrace();
     }
   }
   Date[] arrivals = (Date[]) arrivalDates.toArray(new Date[0]);
   // make sure arrivaldates are sorted
   Arrays.sort(arrivals);
   if (arrivals.length > 1) {
     for (int i = 1; i < arrivals.length; i++) {
       long t1 = arrivals[i].getTime();
       long t2 = arrivals[i - 1].getTime();
       long iat = arrivals[i].getTime() - arrivals[i - 1].getTime();
       if (iat >= 0) {
         arrivalStats.addValue(iat);
       }
     }
   }
 }
Ejemplo n.º 3
0
  /** Iterate through the graph, calculating rank. */
  protected void iterateGraph(final int max_iterations) {
    final double[] rankList = new double[nodeList.length];

    // either run through N iterations, or until the standard
    // error converges below a threshold
    for (int k = 0; k < max_iterations; k++) {
      distStats.clear();

      // calculate the next rank for each node
      for (int i = 0; i < nodeList.length; i++) {
        final Node n1 = nodeList[i];
        double rank = 0.0D;

        for (Node n2 : n1.edges) {
          rank += n2.rank / (double) n2.edges.size();
        }

        rank *= TEXTRANK_DAMPING_FACTOR;
        rank += 1.0D - TEXTRANK_DAMPING_FACTOR;

        rankList[i] = rank;
        distStats.addValue(Math.abs(n1.rank - rank));
        // System.out.println("node : " + n1.key + " rank : " + Math.abs((n1.rank - rank)));
      }

      final double standard_error =
          distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN());

      // swap in new rank values
      for (int i = 0; i < nodeList.length; i++) {
        nodeList[i].rank = rankList[i];
      }

      if (standard_error < STANDARD_ERROR_THRESHOLD) {
        break;
      }
    }
  }
Ejemplo n.º 4
0
  /**
   * Calculates the rank weighting scores for all the nodes in the graph. Iterative calculates over
   * the graph until convergence at the standard error threshold or until max iterations.
   *
   * @param maxIterations Max number of iterations allowed for calculating rank scores
   * @param language Language of the text to calculate rank weighting scores for Available
   *     languages: Icelandic and English
   */
  public void weigthingScore(int maxIterations, Language language) {

    LinkedList<Node> nodes = new LinkedList<Node>();

    // Add nodes to LinkedList, we need them to stay in order
    for (int i = 0; i < nodeList.length; i++) {
      nodes.add(nodeList[i]);
    }

    /*

    WS(Vi) = ( 1 - d) + d * Sum(VjIn)  ________Wij________ * WS(Vj)
    									  Sum(Vk outVj) Wjk

    */
    for (int k = 0; k < maxIterations; k++) {
      distStats.clear();

      // Use dynamic programming to calculate the scores
      double previousWSScore[] = new double[nodes.size()];

      // Read in scores already calculated for nodes
      for (Node s : nodeList) {
        previousWSScore[nodes.indexOf(s)] = s.rank;
      }

      // For all nodes in the graph
      for (Node sentence_i : nodes) {
        double resultSumVji = 0;

        // For all in-coming edges of Vi
        for (Node sentence_j : sentence_i.edgesIN) {

          // Do not compare a sentence to it self, we do not allow self voting here
          if (!sentence_j.value.text.equalsIgnoreCase(sentence_i.value.text)) {

            // Calculate the sum of all similarity measurements
            // from all Vj nodes with outgoing edges to Vk nodes, see Wjk in equation
            double sumWjk = getSumWjk(sentence_j, language);

            if (sumWjk != 0) {
              double Wji = 0.0;
              if (language.equals(Language.ICELANDIC)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarity(sentence_j);

              } else if (language.equals(Language.ENGLISH)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarityEN(sentence_j);
              }
              // Get the score for the previous node
              double WSVj = previousWSScore[nodes.indexOf(sentence_j)];

              // Sum all (j in Vj)
              resultSumVji += ((Wji / sumWjk) * WSVj);
            }
          }
        }
        // Calculate weighting score WS(Vi)
        double WSVi = (1.0 - TEXTRANK_DAMPING_FACTOR) + TEXTRANK_DAMPING_FACTOR * resultSumVji;
        distStats.addValue(Math.abs(sentence_i.rank - WSVi));
        sentence_i.rank = WSVi;
      }
      // Calculate the Standard Error of the Mean
      final double standard_error =
          distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN());

      // if std error of the mean is less than threshold
      // the graph has converged and we break
      if (standard_error < STANDARD_ERROR_THRESHOLD) {
        break;
      }
    }
  }