Java SummaryStatistics 예제들, org.apache.commons.math.stat.descriptive.SummaryStatistics Java 예제들

예제 #1

0

파일 보기

파일: Graph.java 프로젝트: lixiangnlp/icetextsum

  /** Sort results to identify potential keywords. */
  public void sortResults(final long max_results) {
    Arrays.sort(
        nodeList,
        new Comparator<Node>() {
          public int compare(Node n1, Node n2) {
            if (n1.rank > n2.rank) {
              return -1;
            } else if (n1.rank < n2.rank) {
              return 1;
            } else {
              return 0;
            }
          }
        });

    // mark the top-ranked nodes
    distStats.clear();

    for (int i = 0; i < nodeList.length; i++) {
      final Node n1 = nodeList[i];

      if (i <= max_results) {
        n1.marked = true;
        distStats.addValue(n1.rank);
      }

      if (LOG.isDebugEnabled()) {
        LOG.debug("n: " + n1.key + " " + n1.rank + " " + n1.marked);

        for (Node n2 : n1.edges) {
          LOG.debug(" - " + n2.key);
        }
      }
    }
  }

예제 #2

0

파일 보기

파일: EmpiricalDistributionImpl.java 프로젝트: mfwzan/astor

 /**
  * Computes binStats
  *
  * @param min minimum value
  * @param delta grid size
  * @throws IOException if an IO error occurs
  */
 @Override
 public void computeBinStats(double min, double delta) throws IOException {
   for (int i = 0; i < inputArray.length; i++) {
     SummaryStatistics stats = binStats.get(findBin(min, inputArray[i], delta));
     stats.addValue(inputArray[i]);
   }
 }

예제 #3

0

파일 보기

파일: M_Recall.java 프로젝트: EEXCESS/DoSeR

 @Override
 public double[] getResult() {
   double[] result = new double[3];
   result[0] = overallRes / overallQueries;
   result[1] = sumStats.getStandardDeviation();
   result[2] = sumStats.getVariance();
   return result;
 }

예제 #4

0

파일 보기

파일: PerformanceLogReplayResult.java 프로젝트: CaoAo/BeehiveZ

 /**
  * Calculates and returns the arrival rate of the traces in piList
  *
  * @return double
  */
 public double getArrivalRate() {
   double arrivalRate = 0;
   if (arrivalStats.getN() > 0 && arrivalStats.getMean() != 0) {
     // mean arrivalRate is 1 divided by the mean of the inter-arrival
     // times
     arrivalRate = 1 / arrivalStats.getMean();
   }
   return arrivalRate;
 }

예제 #5

0

파일 보기

파일: Ordering.java 프로젝트: ZhengYi0310/ua-ros-pkg

  public static void main(String[] args) {
    Ordering ordering = new Ordering();
    ordering.prepare("ww3d", SequenceType.allen);

    SummaryStatistics ss = new SummaryStatistics();
    for (int i = 0; i < 100; ++i) {
      double value = ordering.experiment(SequenceType.allen, true);
      //			double value = ordering.orderingExperiment("ww3d", SequenceType.allen, true);
      System.out.println("..." + value);
      ss.addValue(value);
    }
    System.out.println("Summary " + ss.getMean() + " -- " + ss.getStandardDeviation());
  }

예제 #6

0

파일 보기

파일: EmpiricalDistributionImpl.java 프로젝트: mfwzan/astor

    /**
     * Computes binStats
     *
     * @param min minimum value
     * @param delta grid size
     * @throws IOException if an IO error occurs
     */
    @Override
    public void computeBinStats(double min, double delta) throws IOException {
      String str = null;
      double val = 0.0d;
      while ((str = inputStream.readLine()) != null) {
        val = Double.parseDouble(str);
        SummaryStatistics stats = binStats.get(findBin(min, val, delta));
        stats.addValue(val);
      }

      inputStream.close();
      inputStream = null;
    }

예제 #7

0

파일 보기

파일: EmpiricalDistributionImpl.java 프로젝트: mfwzan/astor

 /**
  * Computes the empirical distribution using data read from a URL.
  *
  * @param url url of the input file
  * @throws IOException if an IO error occurs
  */
 public void load(URL url) throws IOException {
   BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
   try {
     DataAdapter da = new StreamDataAdapter(in);
     try {
       da.computeStats();
     } catch (IOException ioe) {
       // don't wrap exceptions which are already IOException
       throw ioe;
     } catch (RuntimeException rte) {
       // don't wrap RuntimeExceptions
       throw rte;
     } catch (Exception e) {
       throw MathRuntimeException.createIOException(e);
     }
     if (sampleStats.getN() == 0) {
       throw MathRuntimeException.createEOFException("URL {0} contains no data", url);
     }
     in = new BufferedReader(new InputStreamReader(url.openStream()));
     fillBinStats(in);
     loaded = true;
   } finally {
     try {
       in.close();
     } catch (IOException ex) {
       // ignore
     }
   }
 }

예제 #8

0

파일 보기

파일: TitanGraphPerformanceMemoryTest.java 프로젝트: pombredanne/titan-1

 @Test
 public void testMemoryLeakage() {
   long memoryBaseline = 0;
   SummaryStatistics stats = new SummaryStatistics();
   int numRuns = 25;
   for (int r = 0; r < numRuns; r++) {
     if (r == 1 || r == (numRuns - 1)) {
       memoryBaseline = MemoryAssess.getMemoryUse();
       stats.addValue(memoryBaseline);
       // System.out.println("Memory before run "+(r+1)+": " + memoryBaseline / 1024 + " KB");
     }
     for (int t = 0; t < 1000; t++) {
       graph.addVertex(null);
       graph.rollback();
       TitanTransaction tx = graph.newTransaction();
       tx.addVertex();
       tx.rollback();
     }
     if (r == 1 || r == (numRuns - 1)) {
       memoryBaseline = MemoryAssess.getMemoryUse();
       stats.addValue(memoryBaseline);
       // System.out.println("Memory after run " + (r + 1) + ": " + memoryBaseline / 1024 + " KB");
     }
     clopen();
   }
   System.out.println(
       "Average: " + stats.getMean() + " Std. Dev: " + stats.getStandardDeviation());
   assertTrue(stats.getStandardDeviation() < stats.getMin());
 }

예제 #9

0

파일 보기

파일: EmpiricalDistributionImpl.java 프로젝트: mfwzan/astor

  /**
   * Fills binStats array (second pass through data file).
   *
   * @param in object providing access to the data
   * @throws IOException if an IO error occurs
   */
  private void fillBinStats(Object in) throws IOException {
    // Load array of bin upper bounds -- evenly spaced from min - max
    double min = sampleStats.getMin();
    double max = sampleStats.getMax();
    double delta = (max - min) / (Double.valueOf(binCount)).doubleValue();
    double[] binUpperBounds = new double[binCount];
    binUpperBounds[0] = min + delta;
    for (int i = 1; i < binCount - 1; i++) {
      binUpperBounds[i] = binUpperBounds[i - 1] + delta;
    }
    binUpperBounds[binCount - 1] = max;

    // Initialize binStats ArrayList
    if (!binStats.isEmpty()) {
      binStats.clear();
    }
    for (int i = 0; i < binCount; i++) {
      SummaryStatistics stats = new SummaryStatistics();
      binStats.add(i, stats);
    }

    // Filling data in binStats Array
    DataAdapterFactory aFactory = new DataAdapterFactory();
    DataAdapter da = aFactory.getAdapter(in);
    try {
      da.computeBinStats(min, delta);
    } catch (IOException ioe) {
      // don't wrap exceptions which are already IOException
      throw ioe;
    } catch (RuntimeException rte) {
      // don't wrap RuntimeExceptions
      throw rte;
    } catch (Exception e) {
      throw MathRuntimeException.createIOException(e);
    }

    // Assign upperBounds based on bin counts
    upperBounds = new double[binCount];
    upperBounds[0] = ((double) binStats.get(0).getN()) / (double) sampleStats.getN();
    for (int i = 1; i < binCount - 1; i++) {
      upperBounds[i] =
          upperBounds[i - 1] + ((double) binStats.get(i).getN()) / (double) sampleStats.getN();
    }
    upperBounds[binCount - 1] = 1.0d;
  }

예제 #10

0

파일 보기

파일: M_Recall.java 프로젝트: EEXCESS/DoSeR

 @Override
 public void finishQuery(int qryN) {
   queryVal = (double) correctHits / (double) map.size();
   overallRes += queryVal;
   sumStats.addValue(queryVal);
   map = new HashMap<Integer, Integer>();
   correctHits = 0;
   overallQueries++;
 }

예제 #11

0

파일 보기

파일: PerformanceLogReplayResult.java 프로젝트: CaoAo/BeehiveZ

 /**
  * Calculates the average, min ad max throughput time out of the throughput times of all traces in
  * piList. Next to this, the arrival rate is calculated. All metrics are based on the process
  * instances in piList only
  *
  * @param piList ArrayList: the process instances used
  * @param fitOption int: the fit option used (how to deal with non-conformance)
  * @throws Exception
  */
 public void calculateMetrics(ArrayList piList, int fitOption) throws Exception {
   properFrequency = 0;
   timeStats.clear();
   arrivalStats.clear();
   ArrayList arrivalDates = new ArrayList();
   ListIterator lit = piList.listIterator();
   while (lit.hasNext()) {
     ExtendedLogTrace currentTrace = (ExtendedLogTrace) lit.next();
     if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) {
       properFrequency++;
     }
     try {
       long tp = (currentTrace.getEndDate().getTime() - currentTrace.getBeginDate().getTime());
       if (fitOption == 0) {
         // timeStats based on all traces
         timeStats.addValue(tp);
         arrivalDates.add(currentTrace.getBeginDate());
       }
       if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) {
         if (fitOption == 1) {
           // timeStats based on fitting traces only
           timeStats.addValue(tp);
           arrivalDates.add(currentTrace.getBeginDate());
         }
       }
     } catch (NullPointerException ex) {
       ex.printStackTrace();
     }
   }
   Date[] arrivals = (Date[]) arrivalDates.toArray(new Date[0]);
   // make sure arrivaldates are sorted
   Arrays.sort(arrivals);
   if (arrivals.length > 1) {
     for (int i = 1; i < arrivals.length; i++) {
       long t1 = arrivals[i].getTime();
       long t2 = arrivals[i - 1].getTime();
       long iat = arrivals[i].getTime() - arrivals[i - 1].getTime();
       if (iat >= 0) {
         arrivalStats.addValue(iat);
       }
     }
   }
 }

예제 #12

0

파일 보기

파일: Graph.java 프로젝트: lixiangnlp/icetextsum

  /** Iterate through the graph, calculating rank. */
  protected void iterateGraph(final int max_iterations) {
    final double[] rankList = new double[nodeList.length];

    // either run through N iterations, or until the standard
    // error converges below a threshold
    for (int k = 0; k < max_iterations; k++) {
      distStats.clear();

      // calculate the next rank for each node
      for (int i = 0; i < nodeList.length; i++) {
        final Node n1 = nodeList[i];
        double rank = 0.0D;

        for (Node n2 : n1.edges) {
          rank += n2.rank / (double) n2.edges.size();
        }

        rank *= TEXTRANK_DAMPING_FACTOR;
        rank += 1.0D - TEXTRANK_DAMPING_FACTOR;

        rankList[i] = rank;
        distStats.addValue(Math.abs(n1.rank - rank));
        // System.out.println("node : " + n1.key + " rank : " + Math.abs((n1.rank - rank)));
      }

      final double standard_error =
          distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN());

      // swap in new rank values
      for (int i = 0; i < nodeList.length; i++) {
        nodeList[i].rank = rankList[i];
      }

      if (standard_error < STANDARD_ERROR_THRESHOLD) {
        break;
      }
    }
  }

예제 #13

0

파일 보기

파일: AlgoNormalQuantilePlot.java 프로젝트: avilleret/geogebra

  private GeoSegment getQQLineSegment() {

    SummaryStatistics stats = new SummaryStatistics();
    for (int i = 0; i < sortedData.length; i++) {
      stats.addValue(sortedData[i]);
    }
    double sd = stats.getStandardDeviation();
    double mean = stats.getMean();
    double min = stats.getMin();
    double max = stats.getMax();

    // qq line: y = (1/sd)x - mean/sd

    GeoPoint startPoint = new GeoPoint(cons);
    startPoint.setCoords(min, (min / sd) - mean / sd, 1.0);
    GeoPoint endPoint = new GeoPoint(cons);
    endPoint.setCoords(max, (max / sd) - mean / sd, 1.0);
    GeoSegment seg = new GeoSegment(cons, startPoint, endPoint);
    seg.calcLength();

    return seg;
  }

예제 #14

0

파일 보기

파일: EmpiricalDistributionImpl.java 프로젝트: mfwzan/astor

  /**
   * Generates a random value from this distribution.
   *
   * @return the random value.
   * @throws IllegalStateException if the distribution has not been loaded
   */
  public double getNextValue() throws IllegalStateException {

    if (!loaded) {
      throw MathRuntimeException.createIllegalStateException("distribution not loaded");
    }

    // Start with a uniformly distributed random number in (0,1)
    double x = Math.random();

    // Use this to select the bin and generate a Gaussian within the bin
    for (int i = 0; i < binCount; i++) {
      if (x <= upperBounds[i]) {
        SummaryStatistics stats = binStats.get(i);
        if (stats.getN() > 0) {
          if (stats.getStandardDeviation() > 0) { // more than one obs
            return randomData.nextGaussian(stats.getMean(), stats.getStandardDeviation());
          } else {
            return stats.getMean(); // only one obs in bin
          }
        }
      }
    }
    throw new MathRuntimeException("no bin selected");
  }

예제 #15

0

파일 보기

파일: PerformanceLogReplayResult.java 프로젝트: CaoAo/BeehiveZ

/**
 * Contains all the performance results obtained during log replay analysis. Can be used to retrieve
 * values for the performance metrics and to get extended visualizations.
 *
 * @see PerformanceMeasurer
 * @author Peter T.G. Hornix ([email protected])
 */
public class PerformanceLogReplayResult extends LogReplayAnalysisResult {

  // DescriptiveStatistics-object in which throughput times can be stored
  private DescriptiveStatistics timeStats = DescriptiveStatistics.newInstance();
  // SummaryStatistics to obtain mean inter arrival times
  private SummaryStatistics arrivalStats = SummaryStatistics.newInstance();

  // number of log traces that can be replayed normally
  private int properFrequency;

  public PerformanceLogReplayResult(
      AnalysisConfiguration analysisOptions,
      PetriNet net,
      LogReader log,
      LogReplayAnalysisMethod method) {
    // call the constructor of the superclass
    super(analysisOptions, net, log, method);
  }

  /**
   * Initializes the diagnostic data structures needed to store the measurements taken during the
   * log replay analysis.
   */
  protected void initDiagnosticDataStructures() {
    replayedLog = new ExtendedLogReader(inputLogReader);
    replayedPetriNet = new ExtendedPetriNet(inputPetriNet, replayedLog.getLogTraceIDs());
  }

  // ////////////////////////////METRICS-RELATED
  // METHODS///////////////////////////

  /**
   * Calculates the average, min ad max throughput time out of the throughput times of all traces in
   * piList. Next to this, the arrival rate is calculated. All metrics are based on the process
   * instances in piList only
   *
   * @param piList ArrayList: the process instances used
   * @param fitOption int: the fit option used (how to deal with non-conformance)
   * @throws Exception
   */
  public void calculateMetrics(ArrayList piList, int fitOption) throws Exception {
    properFrequency = 0;
    timeStats.clear();
    arrivalStats.clear();
    ArrayList arrivalDates = new ArrayList();
    ListIterator lit = piList.listIterator();
    while (lit.hasNext()) {
      ExtendedLogTrace currentTrace = (ExtendedLogTrace) lit.next();
      if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) {
        properFrequency++;
      }
      try {
        long tp = (currentTrace.getEndDate().getTime() - currentTrace.getBeginDate().getTime());
        if (fitOption == 0) {
          // timeStats based on all traces
          timeStats.addValue(tp);
          arrivalDates.add(currentTrace.getBeginDate());
        }
        if (currentTrace.hasProperlyTerminated() && currentTrace.hasSuccessfullyExecuted()) {
          if (fitOption == 1) {
            // timeStats based on fitting traces only
            timeStats.addValue(tp);
            arrivalDates.add(currentTrace.getBeginDate());
          }
        }
      } catch (NullPointerException ex) {
        ex.printStackTrace();
      }
    }
    Date[] arrivals = (Date[]) arrivalDates.toArray(new Date[0]);
    // make sure arrivaldates are sorted
    Arrays.sort(arrivals);
    if (arrivals.length > 1) {
      for (int i = 1; i < arrivals.length; i++) {
        long t1 = arrivals[i].getTime();
        long t2 = arrivals[i - 1].getTime();
        long iat = arrivals[i].getTime() - arrivals[i - 1].getTime();
        if (iat >= 0) {
          arrivalStats.addValue(iat);
        }
      }
    }
  }

  /**
   * Exports the throughput times of all process instances in piList to a comma-seperated text-file.
   *
   * @param piList ArrayList: the process instances used
   * @param file File: the file to which the times are exported
   * @param divider long: the time divider used
   * @param sort String: the time sort used
   * @param fitOption int: the fit option used (how to deal with non-conformance)
   * @throws IOException
   */
  public void exportToFile(ArrayList piList, File file, long divider, String sort, int fitOption)
      throws IOException {
    Writer output = new BufferedWriter(new FileWriter(file));
    String line = "Log Trace,Throughput time (" + sort + ")\n";
    output.write(line);
    ListIterator lit = piList.listIterator();
    while (lit.hasNext()) {
      ExtendedLogTrace currentTrace = (ExtendedLogTrace) lit.next();
      try {
        double tp =
            (currentTrace.getEndDate().getTime() - currentTrace.getBeginDate().getTime())
                * 1.0
                / divider;
        if (fitOption == 0) {
          // times based on all traces
          line = currentTrace.getName() + "," + tp + "\n";
          // write line to the file
          output.write(line);
        }
        if (fitOption == 1
            && currentTrace.hasProperlyTerminated()
            && currentTrace.hasSuccessfullyExecuted()) {
          // times based on fitting traces only
          line = currentTrace.getName() + "," + tp + "\n";
          // write line to the file
          output.write(line);
        }
      } catch (NullPointerException npe) {
      }
    }
    // close the file
    output.close();
  }

  // ////////////////////////////GET
  // METHODS///////////////////////////////////////

  /**
   * Calculates and returns the stdev in throughput time out of the throughput times in timeStats.
   * (make sure calculateProcessMetrics() is called before this method).
   *
   * @return double
   */
  public double getStdevThroughputTime() {
    return timeStats.getStandardDeviation();
  }

  /**
   * Calculates the average of the (fastestpercentage) fast traces, the (slowestPercentage) slow
   * traces and the (100% - fastestPercentage - slowestPercentage) normal speed traces and returns
   * these averages in an array, where [0]: avg fast throughput time [1]: avg slow throughput time
   * [2]: avg middle throughput time
   *
   * @param fastestPercentage double: the percentage of measurements that is to be counted as fast
   * @param slowestPercentage double: the percentage of measurements that is to be counted as slow
   * @return double[]
   */
  public double[] getAverageTimes(double fastestPercentage, double slowestPercentage) {
    // initialize arrays
    double[] timeList = timeStats.getSortedValues();
    double[] avgTimes = new double[3];
    long total = 0;
    // obtain the number of fast , slow, normal traces
    int[] sizes = getSizes(fastestPercentage, slowestPercentage);
    int fastSize = sizes[0], slowSize = sizes[1], middleSize = sizes[2];
    for (int i = 0; i < fastSize; i++) {
      total += timeList[i];
    }
    // calculate average of the fastest traces
    double avgFastestTime = 0.0;
    if (fastSize != 0) {
      avgFastestTime = (total * 1.0) / fastSize;
    }
    // calculate average of the slowest traces
    int upperSize = timeList.length - slowSize;
    total = 0;
    for (int i = upperSize; i < timeList.length; i++) {
      total += timeList[i];
    }
    double avgSlowestTime = 0.0;
    if (slowSize > 0) {
      avgSlowestTime = (total * 1.0) / slowSize;
    }

    // calculate the middle/normal-speed traces
    total = 0;
    for (int i = fastSize; i < upperSize; i++) {
      total += timeList[i];
    }
    double avgMiddleTime = 0.0;
    if (middleSize > 0) {
      avgMiddleTime = (total * 1.0) / middleSize;
    }
    avgTimes[0] = avgFastestTime;
    avgTimes[1] = avgSlowestTime;
    avgTimes[2] = avgMiddleTime;
    return avgTimes;
  }

  /**
   * Returns an array containing the number of process instances that are considered to be fast,
   * i.e. have a low throughput time (place 0 in array), the number of process instances that are
   * slow (place 1 in array) and the number of process instances that are considered to be of normal
   * speed (place 2 in array). Based on fastestPercentage, slowestPercentage and timeList (thus
   * method calculateProcessMetrics() should be called before this one)
   *
   * @param fastestPercentage double: the percentage of measurements that is to be counted as fast
   * @param slowestPercentage double: the percentage of measurements that is to be counted as slow
   * @return int[]
   */
  public int[] getSizes(double fastestPercentage, double slowestPercentage) {
    int[] sizes = new int[3];
    String sizeString;
    int length = timeStats.getValues().length;
    sizeString = Math.round((length * fastestPercentage) / 100.0) + "";
    sizes[0] = Integer.parseInt(sizeString);
    if (sizes[0] != length) {
      sizeString = Math.round((length * slowestPercentage) / 100.0) + "";
      sizes[1] = Integer.parseInt(sizeString);
      if ((sizes[0] + sizes[1]) > length) {
        // Make sure that sizes[0] + sizes[1] remains smaller than
        // the number of measurements in timeList (rounding could mess
        // this up)
        sizes[1] = length - sizes[0];
      }
    } else {
      sizes[1] = 0;
    }
    sizes[2] = length - sizes[0] - sizes[1];
    return sizes;
  }

  /**
   * Calculates and returns the arrival rate of the traces in piList
   *
   * @return double
   */
  public double getArrivalRate() {
    double arrivalRate = 0;
    if (arrivalStats.getN() > 0 && arrivalStats.getMean() != 0) {
      // mean arrivalRate is 1 divided by the mean of the inter-arrival
      // times
      arrivalRate = 1 / arrivalStats.getMean();
    }
    return arrivalRate;
  }

  /**
   * Returns the arrival Stats of the traces in piList
   *
   * @return SummaryStatistics
   */
  public SummaryStatistics getArrivalStats() {
    return arrivalStats;
  }

  /**
   * Returns the mean throughput time
   *
   * @return double
   */
  public double getMeanThroughputTime() {
    return timeStats.getMean();
  }

  /**
   * Returns the minimal throughput time. Note that method calculateProcessMetrics() should be
   * called before this method.
   *
   * @return double
   */
  public double getMinThroughputTime() {
    return timeStats.getMin();
  }

  /**
   * Returns the maximal throughput time
   *
   * @return double
   */
  public double getMaxThroughputTime() {
    return timeStats.getMax();
  }

  /**
   * returns the number of cases that execute successfully and complete properly
   *
   * @return int
   */
  public int getProperFrequency() {
    return (properFrequency);
  }

  // ////////////////////////////GRAPPA-RELATED
  // METHODS///////////////////////////
  /**
   * Creates a visualization of the performance analysis results. Note that a change of the display
   * state by the user will have no effect before calling this methods. This is intended to prevent
   * unnecessary cloning of the extended petri net, which actually delivers the custom visualization
   * of the performance analysis results.
   *
   * @param selectedInstances The process instances that have been selected for updating the
   *     visualization.
   * @return The visualization wrapped in a ModelGraphPanel.
   */
  public ModelGraphPanel getVisualization(ArrayList selectedInstances) {
    // sets the currentlySelectedInstances attribute, which is necessary
    // because
    // the writeToDot() method has a fixed interface, though the
    // visualization should
    // be able to take them into account
    ((ExtendedPetriNet) replayedPetriNet).currentlySelectedInstances = selectedInstances;
    ModelGraphPanel myResultVisualization;
    myResultVisualization = ((ExtendedPetriNet) replayedPetriNet).getGrappaVisualization();
    return myResultVisualization;
  }
}

예제 #16

0

파일 보기

파일: Graph.java 프로젝트: lixiangnlp/icetextsum

 /** Calculate a threshold for the ranked results. */
 public double getRankThreshold() {
   return distStats.getMean() + (distStats.getStandardDeviation() * INCLUSIVE_COEFF);
 }

예제 #17

0

파일 보기

파일: Graph.java 프로젝트: lixiangnlp/icetextsum

  /**
   * Calculates the rank weighting scores for all the nodes in the graph. Iterative calculates over
   * the graph until convergence at the standard error threshold or until max iterations.
   *
   * @param maxIterations Max number of iterations allowed for calculating rank scores
   * @param language Language of the text to calculate rank weighting scores for Available
   *     languages: Icelandic and English
   */
  public void weigthingScore(int maxIterations, Language language) {

    LinkedList<Node> nodes = new LinkedList<Node>();

    // Add nodes to LinkedList, we need them to stay in order
    for (int i = 0; i < nodeList.length; i++) {
      nodes.add(nodeList[i]);
    }

    /*

    WS(Vi) = ( 1 - d) + d * Sum(VjIn)  ________Wij________ * WS(Vj)
    									  Sum(Vk outVj) Wjk

    */
    for (int k = 0; k < maxIterations; k++) {
      distStats.clear();

      // Use dynamic programming to calculate the scores
      double previousWSScore[] = new double[nodes.size()];

      // Read in scores already calculated for nodes
      for (Node s : nodeList) {
        previousWSScore[nodes.indexOf(s)] = s.rank;
      }

      // For all nodes in the graph
      for (Node sentence_i : nodes) {
        double resultSumVji = 0;

        // For all in-coming edges of Vi
        for (Node sentence_j : sentence_i.edgesIN) {

          // Do not compare a sentence to it self, we do not allow self voting here
          if (!sentence_j.value.text.equalsIgnoreCase(sentence_i.value.text)) {

            // Calculate the sum of all similarity measurements
            // from all Vj nodes with outgoing edges to Vk nodes, see Wjk in equation
            double sumWjk = getSumWjk(sentence_j, language);

            if (sumWjk != 0) {
              double Wji = 0.0;
              if (language.equals(Language.ICELANDIC)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarity(sentence_j);

              } else if (language.equals(Language.ENGLISH)) {
                // Calculate Wij, similarity between two sentences
                Wji = sentence_i.similarityEN(sentence_j);
              }
              // Get the score for the previous node
              double WSVj = previousWSScore[nodes.indexOf(sentence_j)];

              // Sum all (j in Vj)
              resultSumVji += ((Wji / sumWjk) * WSVj);
            }
          }
        }
        // Calculate weighting score WS(Vi)
        double WSVi = (1.0 - TEXTRANK_DAMPING_FACTOR) + TEXTRANK_DAMPING_FACTOR * resultSumVji;
        distStats.addValue(Math.abs(sentence_i.rank - WSVi));
        sentence_i.rank = WSVi;
      }
      // Calculate the Standard Error of the Mean
      final double standard_error =
          distStats.getStandardDeviation() / Math.sqrt((double) distStats.getN());

      // if std error of the mean is less than threshold
      // the graph has converged and we break
      if (standard_error < STANDARD_ERROR_THRESHOLD) {
        break;
      }
    }
  }