Exemplo n.º 1
0
 /**
  * Default constructor with the full set of parameters.
  *
  * @param delta1 Delta value (suggested 6). <br>
  *     It directly affects the number of events. Increasing this value, reduces the number of them
  *     and vice versa.
  * @param delta2 Delta2 value. <br>
  *     Prime divisors of the number of documents are required as values. It must be
  *     cross-referenced with the number of documents. More specifically, the outcome of the
  *     division between the number of documents and this metric should result the number of total
  *     windows.
  * @param gamma Gamma value (suggested 5). <br>
  *     It affects the quality of the uncovered events. Values greater than 15, seem to increase
  *     the number of the uncovered events.
  * @param minTermSupport Minimum term support value (suggested 0.0001). <br>
  *     Changing this value would result in altering the lower bound below which a term should not
  *     be included in the keywords list of an event.
  * @param maxTermSupport Maximum term support value (suggested 0.01). <br>
  *     Changing this value would result in altering the upper bound above which a term should not
  *     be included in the keywords list of an event.
  * @param timeSliceA Starting timeslice.
  * @param timeSliceB Ending timeslice.
  * @param corpus An EDCoWCorpus object.
  * @see #EDCoW(int, int, int, EDCoWCorpus) EDCoW() minimum constructor.
  */
 public EDCoW(
     int delta1,
     int delta2,
     int gamma,
     double minTermSupport,
     double maxTermSupport,
     int timeSliceA,
     int timeSliceB,
     EDCoWCorpus corpus) {
   this.delta = delta1;
   this.delta2 = delta2;
   this.gamma = gamma;
   this.minTermSupport = minTermSupport;
   this.maxTermSupport = maxTermSupport;
   this.timeSliceA = timeSliceA;
   this.timeSliceB = timeSliceB;
   this.countCorpus = 0;
   this.corpus = corpus;
   for (Integer numberOfDocument : corpus.getNumberOfDocuments()) {
     this.countCorpus += numberOfDocument;
   }
 }
Exemplo n.º 2
0
  /**
   * Method to run the algorithm and analyze terms and frequencies in a specific window.
   *
   * @param window The window index (0, 1, 2 etc).
   * @throws java.lang.Exception General Exception.
   */
  public void processWindow(int window) throws Exception {
    LinkedList<EDCoWKeyword> keyWords = new LinkedList<>();
    Integer[] distributioni = corpus.getNumberOfDocuments();
    double[] distributiond = new double[delta2];
    int startSlice = window * delta2;
    int endSlice = startSlice + delta2 - 1;
    for (int i = startSlice; i < endSlice; i++) {
      distributiond[i - startSlice] = (double) distributioni[i];
    }
    termDocMap
        .entrySet()
        .stream()
        .forEach(
            (entry) -> {
              Integer frequencyf[] = entry.getValue();
              double frequencyd[] = new double[delta2];
              for (int i = startSlice; i < endSlice; i++) {
                frequencyd[i - startSlice] = (double) frequencyf[i];
              }
              keyWords.add(new EDCoWKeyword(entry.getKey(), frequencyd, delta, distributiond));
            });
    double[] autoCorrelationValues = new double[keyWords.size()];
    for (int i = 0; i < keyWords.size(); i++) {
      autoCorrelationValues[i] = keyWords.get(i).getAutoCorrelation();
    }
    EDCoWThreshold th1 = new EDCoWThreshold();
    double theta1 = th1.theta1(autoCorrelationValues, gamma);

    // Removing trivial keywords based on theta1
    LinkedList<EDCoWKeyword> keyWordsList1 = new LinkedList<>();
    keyWords
        .stream()
        .filter((k) -> (k.getAutoCorrelation() > theta1))
        .forEach(
            (k) -> {
              keyWordsList1.add(k);
            });

    keyWordsList1
        .stream()
        .forEach(
            (kw1) -> {
              kw1.computeCrossCorrelation(keyWordsList1);
            });

    double[][] bigMatrix = new double[keyWordsList1.size()][keyWordsList1.size()];
    for (int i = 0; i < keyWordsList1.size(); i++) {
      bigMatrix[i] = keyWordsList1.get(i).getCrossCorrelation();
    }

    // Compute theta2 using the BigMatrix
    double theta2 = th1.theta2(bigMatrix, gamma);
    for (int i = 0; i < keyWordsList1.size(); i++) {
      for (int j = i + 1; j < keyWordsList1.size(); j++) {
        bigMatrix[i][j] = (bigMatrix[i][j] < theta2) ? 0 : bigMatrix[i][j];
      }
    }
    EDCoWModularityDetection modularity =
        new EDCoWModularityDetection(keyWordsList1, bigMatrix, startSlice, endSlice);

    double thresholdE = 0.1;
    ArrayList<Community> finalArrCom = modularity.getCommunitiesFiltered(thresholdE);
    finalArrCom
        .stream()
        .map(
            (c) -> {
              System.out.println(c.getCommunitySize());
              return c;
            })
        .forEach(
            (c) -> {
              modularity.saveEventFromCommunity(c);
            });
    eventList.addAll(modularity.getEvents());
  }
Exemplo n.º 3
0
  @Override
  public void apply() {
    long startTime = System.currentTimeMillis();

    double minTermOccur = minTermSupport * countCorpus; // Min support * Message count corpus
    double maxTermOccur = maxTermSupport * countCorpus; // Max support * Message count corpus

    int windows = (timeSliceB - timeSliceA) / delta2;
    termDocMap = new HashMap<>();
    eventList = new LinkedList<>();

    PrintUtilities.printInfoMessageln("Calculating term frequencies...");
    List<String> terms = corpus.getTerms();
    for (int i = 0; i < terms.size(); i++) {
      String term = terms.get(i);
      if (term.length()
          > 1) { // Stopwords check removed as they are already ommitted when creating the dataset
        Integer[] frequency = corpus.getDocumentsTermFrequency(i);
        int cf = 0;
        for (int freq : frequency) {
          cf += freq;
        }
        if (cf > minTermOccur && cf < maxTermOccur) {
          termDocMap.put(term, frequency);
        }
      }
    }
    PrintUtilities.printInfoMessageln("Calculating windows...");
    for (int i = 0; i < windows; i++) {
      PrintUtilities.printInfoMessageln("Calculating window " + (i + 1) + "\n");
      try {
        processWindow(i);
      } catch (Exception ex) {
        Logger.getLogger(EDCoW.class.getName()).log(Level.SEVERE, null, ex);
      }
    }
    Collections.sort(eventList);
    events = new EDCoWEvents();

    eventList
        .stream()
        .forEach(
            (event) -> {
              // try {
              events.list.add(
                  new EDCoWEvent(
                      event.getKeywordsIDsAsString(),
                      corpus.getDateFromTimeSlice((int) event.startSlice)
                          + ","
                          + corpus.getDateFromTimeSlice((int) event.endSlice - 1),
                      corpus.getIDsOfWindowAsString(
                          corpus.getDateFromTimeSlice((int) event.startSlice),
                          corpus.getDateFromTimeSlice((int) event.endSlice - 1))));
            });

    events.setFullList();

    long endTime = System.currentTimeMillis();
    executionTime = (endTime - startTime) / 1000;
    PrintUtilities.printExecutionTime(
        startTime,
        endTime,
        EDCoW.class.getName(),
        Thread.currentThread().getStackTrace()[1].getMethodName());
  }