/**
  * Computes the empirical distribution using data read from a URL.
  *
  * @param url url of the input file
  * @throws IOException if an IO error occurs
  */
 public void load(URL url) throws IOException {
   BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
   try {
     DataAdapter da = new StreamDataAdapter(in);
     try {
       da.computeStats();
     } catch (IOException ioe) {
       // don't wrap exceptions which are already IOException
       throw ioe;
     } catch (RuntimeException rte) {
       // don't wrap RuntimeExceptions
       throw rte;
     } catch (Exception e) {
       throw MathRuntimeException.createIOException(e);
     }
     if (sampleStats.getN() == 0) {
       throw MathRuntimeException.createEOFException("URL {0} contains no data", url);
     }
     in = new BufferedReader(new InputStreamReader(url.openStream()));
     fillBinStats(in);
     loaded = true;
   } finally {
     try {
       in.close();
     } catch (IOException ex) {
       // ignore
     }
   }
 }
  /**
   * Fills binStats array (second pass through data file).
   *
   * @param in object providing access to the data
   * @throws IOException if an IO error occurs
   */
  private void fillBinStats(Object in) throws IOException {
    // Load array of bin upper bounds -- evenly spaced from min - max
    double min = sampleStats.getMin();
    double max = sampleStats.getMax();
    double delta = (max - min) / (Double.valueOf(binCount)).doubleValue();
    double[] binUpperBounds = new double[binCount];
    binUpperBounds[0] = min + delta;
    for (int i = 1; i < binCount - 1; i++) {
      binUpperBounds[i] = binUpperBounds[i - 1] + delta;
    }
    binUpperBounds[binCount - 1] = max;

    // Initialize binStats ArrayList
    if (!binStats.isEmpty()) {
      binStats.clear();
    }
    for (int i = 0; i < binCount; i++) {
      SummaryStatistics stats = new SummaryStatistics();
      binStats.add(i, stats);
    }

    // Filling data in binStats Array
    DataAdapterFactory aFactory = new DataAdapterFactory();
    DataAdapter da = aFactory.getAdapter(in);
    try {
      da.computeBinStats(min, delta);
    } catch (IOException ioe) {
      // don't wrap exceptions which are already IOException
      throw ioe;
    } catch (RuntimeException rte) {
      // don't wrap RuntimeExceptions
      throw rte;
    } catch (Exception e) {
      throw MathRuntimeException.createIOException(e);
    }

    // Assign upperBounds based on bin counts
    upperBounds = new double[binCount];
    upperBounds[0] = ((double) binStats.get(0).getN()) / (double) sampleStats.getN();
    for (int i = 1; i < binCount - 1; i++) {
      upperBounds[i] =
          upperBounds[i - 1] + ((double) binStats.get(i).getN()) / (double) sampleStats.getN();
    }
    upperBounds[binCount - 1] = 1.0d;
  }
 /**
  * Calculates and returns the arrival rate of the traces in piList
  *
  * @return double
  */
 public double getArrivalRate() {
   double arrivalRate = 0;
   if (arrivalStats.getN() > 0 && arrivalStats.getMean() != 0) {
     // mean arrivalRate is 1 divided by the mean of the inter-arrival
     // times
     arrivalRate = 1 / arrivalStats.getMean();
   }
   return arrivalRate;
 }
Beispiel #4
0
  /** Iterate through the graph, calculating rank. */
  protected void iterateGraph(final int max_iterations) {
    final double[] rankList = new double[nodeList.length];

    // either run through N iterations, or until the standard
    // error converges below a threshold
    for (int k = 0; k < max_iterations; k++) {
      distStats.clear();

      // calculate the next rank for each node
      for (int i = 0; i < nodeList.length; i++) {
        final Node n1 = nodeList[i];
        double rank = 0.0D;

        for (Node n2 : n1.edges) {
          rank += n2.rank / (double) n2.edges.size();
        }

        rank *= TEXTRANK_DAMPING_FACTOR;
        rank += 1.0D - TEXTRANK_DAMPING_FACTOR;

        rankList[i] = rank;
        distStats.addValue(Math.abs(n1.rank - rank));
        // System.out.println("node : " + n1.key + " rank : " + Math.abs((n1.rank - rank)));
      }

      final double standard_error =
          distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN());

      // swap in new rank values
      for (int i = 0; i < nodeList.length; i++) {
        nodeList[i].rank = rankList[i];
      }

      if (standard_error < STANDARD_ERROR_THRESHOLD) {
        break;
      }
    }
  }
  /**
   * Generates a random value from this distribution.
   *
   * @return the random value.
   * @throws IllegalStateException if the distribution has not been loaded
   */
  public double getNextValue() throws IllegalStateException {

    if (!loaded) {
      throw MathRuntimeException.createIllegalStateException("distribution not loaded");
    }

    // Start with a uniformly distributed random number in (0,1)
    double x = Math.random();

    // Use this to select the bin and generate a Gaussian within the bin
    for (int i = 0; i < binCount; i++) {
      if (x <= upperBounds[i]) {
        SummaryStatistics stats = binStats.get(i);
        if (stats.getN() > 0) {
          if (stats.getStandardDeviation() > 0) { // more than one obs
            return randomData.nextGaussian(stats.getMean(), stats.getStandardDeviation());
          } else {
            return stats.getMean(); // only one obs in bin
          }
        }
      }
    }
    throw new MathRuntimeException("no bin selected");
  }
Beispiel #6
0
  /**
   * Calculates the rank weighting scores for all the nodes in the graph. Iterative calculates over
   * the graph until convergence at the standard error threshold or until max iterations.
   *
   * @param maxIterations Max number of iterations allowed for calculating rank scores
   * @param language Language of the text to calculate rank weighting scores for Available
   *     languages: Icelandic and English
   */
  public void weigthingScore(int maxIterations, Language language) {

    LinkedList<Node> nodes = new LinkedList<Node>();

    // Add nodes to LinkedList, we need them to stay in order
    for (int i = 0; i < nodeList.length; i++) {
      nodes.add(nodeList[i]);
    }

    /*

    WS(Vi) = ( 1 - d) + d * Sum(VjIn)  ________Wij________ * WS(Vj)
    									  Sum(Vk outVj) Wjk

    */
    for (int k = 0; k < maxIterations; k++) {
      distStats.clear();

      // Use dynamic programming to calculate the scores
      double previousWSScore[] = new double[nodes.size()];

      // Read in scores already calculated for nodes
      for (Node s : nodeList) {
        previousWSScore[nodes.indexOf(s)] = s.rank;
      }

      // For all nodes in the graph
      for (Node sentence_i : nodes) {
        double resultSumVji = 0;

        // For all in-coming edges of Vi
        for (Node sentence_j : sentence_i.edgesIN) {

          // Do not compare a sentence to it self, we do not allow self voting here
          if (!sentence_j.value.text.equalsIgnoreCase(sentence_i.value.text)) {

            // Calculate the sum of all similarity measurements
            // from all Vj nodes with outgoing edges to Vk nodes, see Wjk in equation
            double sumWjk = getSumWjk(sentence_j, language);

            if (sumWjk != 0) {
              double Wji = 0.0;
              if (language.equals(Language.ICELANDIC)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarity(sentence_j);

              } else if (language.equals(Language.ENGLISH)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarityEN(sentence_j);
              }
              // Get the score for the previous node
              double WSVj = previousWSScore[nodes.indexOf(sentence_j)];

              // Sum all (j in Vj)
              resultSumVji += ((Wji / sumWjk) * WSVj);
            }
          }
        }
        // Calculate weighting score WS(Vi)
        double WSVi = (1.0 - TEXTRANK_DAMPING_FACTOR) + TEXTRANK_DAMPING_FACTOR * resultSumVji;
        distStats.addValue(Math.abs(sentence_i.rank - WSVi));
        sentence_i.rank = WSVi;
      }
      // Calculate the Standard Error of the Mean
      final double standard_error =
          distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN());

      // if std error of the mean is less than threshold
      // the graph has converged and we break
      if (standard_error < STANDARD_ERROR_THRESHOLD) {
        break;
      }
    }
  }