예제 #1
0
파일: EDCoW.java 프로젝트: Lefteris008/EvS
 /**
  * Default constructor with the full set of parameters.
  *
  * @param delta1 Delta value (suggested 6). <br>
  *     It directly affects the number of events. Increasing this value, reduces the number of them
  *     and vice versa.
  * @param delta2 Delta2 value. <br>
  *     Prime divisors of the number of documents are required as values. It must be
  *     cross-referenced with the number of documents. More specifically, the outcome of the
  *     division between the number of documents and this metric should result the number of total
  *     windows.
  * @param gamma Gamma value (suggested 5). <br>
  *     It affects the quality of the uncovered events. Values greater than 15, seem to increase
  *     the number of the uncovered events.
  * @param minTermSupport Minimum term support value (suggested 0.0001). <br>
  *     Changing this value would result in altering the lower bound below which a term should not
  *     be included in the keywords list of an event.
  * @param maxTermSupport Maximum term support value (suggested 0.01). <br>
  *     Changing this value would result in altering the upper bound above which a term should not
  *     be included in the keywords list of an event.
  * @param timeSliceA Starting timeslice.
  * @param timeSliceB Ending timeslice.
  * @param corpus An EDCoWCorpus object.
  * @see #EDCoW(int, int, int, EDCoWCorpus) EDCoW() minimum constructor.
  */
 public EDCoW(
     int delta1,
     int delta2,
     int gamma,
     double minTermSupport,
     double maxTermSupport,
     int timeSliceA,
     int timeSliceB,
     EDCoWCorpus corpus) {
   this.delta = delta1;
   this.delta2 = delta2;
   this.gamma = gamma;
   this.minTermSupport = minTermSupport;
   this.maxTermSupport = maxTermSupport;
   this.timeSliceA = timeSliceA;
   this.timeSliceB = timeSliceB;
   this.countCorpus = 0;
   this.corpus = corpus;
   for (Integer numberOfDocument : corpus.getNumberOfDocuments()) {
     this.countCorpus += numberOfDocument;
   }
 }
예제 #2
0
파일: EDCoW.java 프로젝트: Lefteris008/EvS
  /**
   * Method to run the algorithm and analyze terms and frequencies in a specific window.
   *
   * @param window The window index (0, 1, 2 etc).
   * @throws java.lang.Exception General Exception.
   */
  public void processWindow(int window) throws Exception {
    LinkedList<EDCoWKeyword> keyWords = new LinkedList<>();
    Integer[] distributioni = corpus.getNumberOfDocuments();
    double[] distributiond = new double[delta2];
    int startSlice = window * delta2;
    int endSlice = startSlice + delta2 - 1;
    for (int i = startSlice; i < endSlice; i++) {
      distributiond[i - startSlice] = (double) distributioni[i];
    }
    termDocMap
        .entrySet()
        .stream()
        .forEach(
            (entry) -> {
              Integer frequencyf[] = entry.getValue();
              double frequencyd[] = new double[delta2];
              for (int i = startSlice; i < endSlice; i++) {
                frequencyd[i - startSlice] = (double) frequencyf[i];
              }
              keyWords.add(new EDCoWKeyword(entry.getKey(), frequencyd, delta, distributiond));
            });
    double[] autoCorrelationValues = new double[keyWords.size()];
    for (int i = 0; i < keyWords.size(); i++) {
      autoCorrelationValues[i] = keyWords.get(i).getAutoCorrelation();
    }
    EDCoWThreshold th1 = new EDCoWThreshold();
    double theta1 = th1.theta1(autoCorrelationValues, gamma);

    // Removing trivial keywords based on theta1
    LinkedList<EDCoWKeyword> keyWordsList1 = new LinkedList<>();
    keyWords
        .stream()
        .filter((k) -> (k.getAutoCorrelation() > theta1))
        .forEach(
            (k) -> {
              keyWordsList1.add(k);
            });

    keyWordsList1
        .stream()
        .forEach(
            (kw1) -> {
              kw1.computeCrossCorrelation(keyWordsList1);
            });

    double[][] bigMatrix = new double[keyWordsList1.size()][keyWordsList1.size()];
    for (int i = 0; i < keyWordsList1.size(); i++) {
      bigMatrix[i] = keyWordsList1.get(i).getCrossCorrelation();
    }

    // Compute theta2 using the BigMatrix
    double theta2 = th1.theta2(bigMatrix, gamma);
    for (int i = 0; i < keyWordsList1.size(); i++) {
      for (int j = i + 1; j < keyWordsList1.size(); j++) {
        bigMatrix[i][j] = (bigMatrix[i][j] < theta2) ? 0 : bigMatrix[i][j];
      }
    }
    EDCoWModularityDetection modularity =
        new EDCoWModularityDetection(keyWordsList1, bigMatrix, startSlice, endSlice);

    double thresholdE = 0.1;
    ArrayList<Community> finalArrCom = modularity.getCommunitiesFiltered(thresholdE);
    finalArrCom
        .stream()
        .map(
            (c) -> {
              System.out.println(c.getCommunitySize());
              return c;
            })
        .forEach(
            (c) -> {
              modularity.saveEventFromCommunity(c);
            });
    eventList.addAll(modularity.getEvents());
  }