/** * Computes the empirical distribution using data read from a URL. * * @param url url of the input file * @throws IOException if an IO error occurs */ public void load(URL url) throws IOException { BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream())); try { DataAdapter da = new StreamDataAdapter(in); try { da.computeStats(); } catch (IOException ioe) { // don't wrap exceptions which are already IOException throw ioe; } catch (RuntimeException rte) { // don't wrap RuntimeExceptions throw rte; } catch (Exception e) { throw MathRuntimeException.createIOException(e); } if (sampleStats.getN() == 0) { throw MathRuntimeException.createEOFException("URL {0} contains no data", url); } in = new BufferedReader(new InputStreamReader(url.openStream())); fillBinStats(in); loaded = true; } finally { try { in.close(); } catch (IOException ex) { // ignore } } }
/** * Fills binStats array (second pass through data file). * * @param in object providing access to the data * @throws IOException if an IO error occurs */ private void fillBinStats(Object in) throws IOException { // Load array of bin upper bounds -- evenly spaced from min - max double min = sampleStats.getMin(); double max = sampleStats.getMax(); double delta = (max - min) / (Double.valueOf(binCount)).doubleValue(); double[] binUpperBounds = new double[binCount]; binUpperBounds[0] = min + delta; for (int i = 1; i < binCount - 1; i++) { binUpperBounds[i] = binUpperBounds[i - 1] + delta; } binUpperBounds[binCount - 1] = max; // Initialize binStats ArrayList if (!binStats.isEmpty()) { binStats.clear(); } for (int i = 0; i < binCount; i++) { SummaryStatistics stats = new SummaryStatistics(); binStats.add(i, stats); } // Filling data in binStats Array DataAdapterFactory aFactory = new DataAdapterFactory(); DataAdapter da = aFactory.getAdapter(in); try { da.computeBinStats(min, delta); } catch (IOException ioe) { // don't wrap exceptions which are already IOException throw ioe; } catch (RuntimeException rte) { // don't wrap RuntimeExceptions throw rte; } catch (Exception e) { throw MathRuntimeException.createIOException(e); } // Assign upperBounds based on bin counts upperBounds = new double[binCount]; upperBounds[0] = ((double) binStats.get(0).getN()) / (double) sampleStats.getN(); for (int i = 1; i < binCount - 1; i++) { upperBounds[i] = upperBounds[i - 1] + ((double) binStats.get(i).getN()) / (double) sampleStats.getN(); } upperBounds[binCount - 1] = 1.0d; }
/** * Calculates and returns the arrival rate of the traces in piList * * @return double */ public double getArrivalRate() { double arrivalRate = 0; if (arrivalStats.getN() > 0 && arrivalStats.getMean() != 0) { // mean arrivalRate is 1 divided by the mean of the inter-arrival // times arrivalRate = 1 / arrivalStats.getMean(); } return arrivalRate; }
/** Iterate through the graph, calculating rank. */ protected void iterateGraph(final int max_iterations) { final double[] rankList = new double[nodeList.length]; // either run through N iterations, or until the standard // error converges below a threshold for (int k = 0; k < max_iterations; k++) { distStats.clear(); // calculate the next rank for each node for (int i = 0; i < nodeList.length; i++) { final Node n1 = nodeList[i]; double rank = 0.0D; for (Node n2 : n1.edges) { rank += n2.rank / (double) n2.edges.size(); } rank *= TEXTRANK_DAMPING_FACTOR; rank += 1.0D - TEXTRANK_DAMPING_FACTOR; rankList[i] = rank; distStats.addValue(Math.abs(n1.rank - rank)); // System.out.println("node : " + n1.key + " rank : " + Math.abs((n1.rank - rank))); } final double standard_error = distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN()); // swap in new rank values for (int i = 0; i < nodeList.length; i++) { nodeList[i].rank = rankList[i]; } if (standard_error < STANDARD_ERROR_THRESHOLD) { break; } } }
/** * Generates a random value from this distribution. * * @return the random value. * @throws IllegalStateException if the distribution has not been loaded */ public double getNextValue() throws IllegalStateException { if (!loaded) { throw MathRuntimeException.createIllegalStateException("distribution not loaded"); } // Start with a uniformly distributed random number in (0,1) double x = Math.random(); // Use this to select the bin and generate a Gaussian within the bin for (int i = 0; i < binCount; i++) { if (x <= upperBounds[i]) { SummaryStatistics stats = binStats.get(i); if (stats.getN() > 0) { if (stats.getStandardDeviation() > 0) { // more than one obs return randomData.nextGaussian(stats.getMean(), stats.getStandardDeviation()); } else { return stats.getMean(); // only one obs in bin } } } } throw new MathRuntimeException("no bin selected"); }
/** * Calculates the rank weighting scores for all the nodes in the graph. Iterative calculates over * the graph until convergence at the standard error threshold or until max iterations. * * @param maxIterations Max number of iterations allowed for calculating rank scores * @param language Language of the text to calculate rank weighting scores for Available * languages: Icelandic and English */ public void weigthingScore(int maxIterations, Language language) { LinkedList<Node> nodes = new LinkedList<Node>(); // Add nodes to LinkedList, we need them to stay in order for (int i = 0; i < nodeList.length; i++) { nodes.add(nodeList[i]); } /* WS(Vi) = ( 1 - d) + d * Sum(VjIn) ________Wij________ * WS(Vj) Sum(Vk outVj) Wjk */ for (int k = 0; k < maxIterations; k++) { distStats.clear(); // Use dynamic programming to calculate the scores double previousWSScore[] = new double[nodes.size()]; // Read in scores already calculated for nodes for (Node s : nodeList) { previousWSScore[nodes.indexOf(s)] = s.rank; } // For all nodes in the graph for (Node sentence_i : nodes) { double resultSumVji = 0; // For all in-coming edges of Vi for (Node sentence_j : sentence_i.edgesIN) { // Do not compare a sentence to it self, we do not allow self voting here if (!sentence_j.value.text.equalsIgnoreCase(sentence_i.value.text)) { // Calculate the sum of all similarity measurements // from all Vj nodes with outgoing edges to Vk nodes, see Wjk in equation double sumWjk = getSumWjk(sentence_j, language); if (sumWjk != 0) { double Wji = 0.0; if (language.equals(Language.ICELANDIC)) { // Calculate Wij, similarity between two sentences Wji = sentence_i.similarity(sentence_j); } else if (language.equals(Language.ENGLISH)) { // Calculate Wij, similarity between two sentences Wji = sentence_i.similarityEN(sentence_j); } // Get the score for the previous node double WSVj = previousWSScore[nodes.indexOf(sentence_j)]; // Sum all (j in Vj) resultSumVji += ((Wji / sumWjk) * WSVj); } } } // Calculate weighting score WS(Vi) double WSVi = (1.0 - TEXTRANK_DAMPING_FACTOR) + TEXTRANK_DAMPING_FACTOR * resultSumVji; distStats.addValue(Math.abs(sentence_i.rank - WSVi)); sentence_i.rank = WSVi; } // Calculate the Standard Error of the Mean final double standard_error = distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN()); // if std error of the mean is less than threshold // the graph has converged and we break if (standard_error < STANDARD_ERROR_THRESHOLD) { break; } } }