/** Sort results to identify potential keywords. */ public void sortResults(final long max_results) { Arrays.sort( nodeList, new Comparator<Node>() { public int compare(Node n1, Node n2) { if (n1.rank > n2.rank) { return -1; } else if (n1.rank < n2.rank) { return 1; } else { return 0; } } }); // mark the top-ranked nodes distStats.clear(); for (int i = 0; i < nodeList.length; i++) { final Node n1 = nodeList[i]; if (i <= max_results) { n1.marked = true; distStats.addValue(n1.rank); } if (LOG.isDebugEnabled()) { LOG.debug("n: " + n1.key + " " + n1.rank + " " + n1.marked); for (Node n2 : n1.edges) { LOG.debug(" - " + n2.key); } } } }
/** * Calculates the average, min ad max throughput time out of the throughput times of all traces in * piList. Next to this, the arrival rate is calculated. All metrics are based on the process * instances in piList only * * @param piList ArrayList: the process instances used * @param fitOption int: the fit option used (how to deal with non-conformance) * @throws Exception */ public void calculateMetrics(ArrayList piList, int fitOption) throws Exception { properFrequency = 0; timeStats.clear(); arrivalStats.clear(); ArrayList arrivalDates = new ArrayList(); ListIterator lit = piList.listIterator(); while (lit.hasNext()) { ExtendedLogTrace currentTrace = (ExtendedLogTrace) lit.next(); if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) { properFrequency++; } try { long tp = (currentTrace.getEndDate().getTime() - currentTrace.getBeginDate().getTime()); if (fitOption == 0) { // timeStats based on all traces timeStats.addValue(tp); arrivalDates.add(currentTrace.getBeginDate()); } if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) { if (fitOption == 1) { // timeStats based on fitting traces only timeStats.addValue(tp); arrivalDates.add(currentTrace.getBeginDate()); } } } catch (NullPointerException ex) { ex.printStackTrace(); } } Date[] arrivals = (Date[]) arrivalDates.toArray(new Date[0]); // make sure arrivaldates are sorted Arrays.sort(arrivals); if (arrivals.length > 1) { for (int i = 1; i < arrivals.length; i++) { long t1 = arrivals[i].getTime(); long t2 = arrivals[i - 1].getTime(); long iat = arrivals[i].getTime() - arrivals[i - 1].getTime(); if (iat >= 0) { arrivalStats.addValue(iat); } } } }
/** Iterate through the graph, calculating rank. */ protected void iterateGraph(final int max_iterations) { final double[] rankList = new double[nodeList.length]; // either run through N iterations, or until the standard // error converges below a threshold for (int k = 0; k < max_iterations; k++) { distStats.clear(); // calculate the next rank for each node for (int i = 0; i < nodeList.length; i++) { final Node n1 = nodeList[i]; double rank = 0.0D; for (Node n2 : n1.edges) { rank += n2.rank / (double) n2.edges.size(); } rank *= TEXTRANK_DAMPING_FACTOR; rank += 1.0D - TEXTRANK_DAMPING_FACTOR; rankList[i] = rank; distStats.addValue(Math.abs(n1.rank - rank)); // System.out.println("node : " + n1.key + " rank : " + Math.abs((n1.rank - rank))); } final double standard_error = distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN()); // swap in new rank values for (int i = 0; i < nodeList.length; i++) { nodeList[i].rank = rankList[i]; } if (standard_error < STANDARD_ERROR_THRESHOLD) { break; } } }
/** * Calculates the rank weighting scores for all the nodes in the graph. Iterative calculates over * the graph until convergence at the standard error threshold or until max iterations. * * @param maxIterations Max number of iterations allowed for calculating rank scores * @param language Language of the text to calculate rank weighting scores for Available * languages: Icelandic and English */ public void weigthingScore(int maxIterations, Language language) { LinkedList<Node> nodes = new LinkedList<Node>(); // Add nodes to LinkedList, we need them to stay in order for (int i = 0; i < nodeList.length; i++) { nodes.add(nodeList[i]); } /* WS(Vi) = ( 1 - d) + d * Sum(VjIn) ________Wij________ * WS(Vj) Sum(Vk outVj) Wjk */ for (int k = 0; k < maxIterations; k++) { distStats.clear(); // Use dynamic programming to calculate the scores double previousWSScore[] = new double[nodes.size()]; // Read in scores already calculated for nodes for (Node s : nodeList) { previousWSScore[nodes.indexOf(s)] = s.rank; } // For all nodes in the graph for (Node sentence_i : nodes) { double resultSumVji = 0; // For all in-coming edges of Vi for (Node sentence_j : sentence_i.edgesIN) { // Do not compare a sentence to it self, we do not allow self voting here if (!sentence_j.value.text.equalsIgnoreCase(sentence_i.value.text)) { // Calculate the sum of all similarity measurements // from all Vj nodes with outgoing edges to Vk nodes, see Wjk in equation double sumWjk = getSumWjk(sentence_j, language); if (sumWjk != 0) { double Wji = 0.0; if (language.equals(Language.ICELANDIC)) { // Calculate Wij, similarity between two sentences Wji = sentence_i.similarity(sentence_j); } else if (language.equals(Language.ENGLISH)) { // Calculate Wij, similarity between two sentences Wji = sentence_i.similarityEN(sentence_j); } // Get the score for the previous node double WSVj = previousWSScore[nodes.indexOf(sentence_j)]; // Sum all (j in Vj) resultSumVji += ((Wji / sumWjk) * WSVj); } } } // Calculate weighting score WS(Vi) double WSVi = (1.0 - TEXTRANK_DAMPING_FACTOR) + TEXTRANK_DAMPING_FACTOR * resultSumVji; distStats.addValue(Math.abs(sentence_i.rank - WSVi)); sentence_i.rank = WSVi; } // Calculate the Standard Error of the Mean final double standard_error = distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN()); // if std error of the mean is less than threshold // the graph has converged and we break if (standard_error < STANDARD_ERROR_THRESHOLD) { break; } } }