Java EDCoWCorpus примеры использования

Язык программирования: Java

Пространство имен/Пакет: com.left8.evs.edmodule.data

Класс/Тип: EDCoWCorpus

Примеров на hotexamples.com: 3

Java EDCoWCorpus - 3 примера найдено. Это лучшие примеры Java кода для com.left8.evs.edmodule.data.EDCoWCorpus, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getNumberOfDocuments(2)

getDateFromTimeSlice(1)

getDocumentsTermFrequency(1)

getIDsOfWindowAsString(1)

getTerms(1)

Пример #1

Показать файл

Файл: EDCoW.java Проект: Lefteris008/EvS

 /**
  * Default constructor with the full set of parameters.
  *
  * @param delta1 Delta value (suggested 6). <br>
  *     It directly affects the number of events. Increasing this value, reduces the number of them
  *     and vice versa.
  * @param delta2 Delta2 value. <br>
  *     Prime divisors of the number of documents are required as values. It must be
  *     cross-referenced with the number of documents. More specifically, the outcome of the
  *     division between the number of documents and this metric should result the number of total
  *     windows.
  * @param gamma Gamma value (suggested 5). <br>
  *     It affects the quality of the uncovered events. Values greater than 15, seem to increase
  *     the number of the uncovered events.
  * @param minTermSupport Minimum term support value (suggested 0.0001). <br>
  *     Changing this value would result in altering the lower bound below which a term should not
  *     be included in the keywords list of an event.
  * @param maxTermSupport Maximum term support value (suggested 0.01). <br>
  *     Changing this value would result in altering the upper bound above which a term should not
  *     be included in the keywords list of an event.
  * @param timeSliceA Starting timeslice.
  * @param timeSliceB Ending timeslice.
  * @param corpus An EDCoWCorpus object.
  * @see #EDCoW(int, int, int, EDCoWCorpus) EDCoW() minimum constructor.
  */
 public EDCoW(
     int delta1,
     int delta2,
     int gamma,
     double minTermSupport,
     double maxTermSupport,
     int timeSliceA,
     int timeSliceB,
     EDCoWCorpus corpus) {
   this.delta = delta1;
   this.delta2 = delta2;
   this.gamma = gamma;
   this.minTermSupport = minTermSupport;
   this.maxTermSupport = maxTermSupport;
   this.timeSliceA = timeSliceA;
   this.timeSliceB = timeSliceB;
   this.countCorpus = 0;
   this.corpus = corpus;
   for (Integer numberOfDocument : corpus.getNumberOfDocuments()) {
     this.countCorpus += numberOfDocument;
   }
 }

Пример #2

Показать файл

Файл: EDCoW.java Проект: Lefteris008/EvS

  /**
   * Method to run the algorithm and analyze terms and frequencies in a specific window.
   *
   * @param window The window index (0, 1, 2 etc).
   * @throws java.lang.Exception General Exception.
   */
  public void processWindow(int window) throws Exception {
    LinkedList<EDCoWKeyword> keyWords = new LinkedList<>();
    Integer[] distributioni = corpus.getNumberOfDocuments();
    double[] distributiond = new double[delta2];
    int startSlice = window * delta2;
    int endSlice = startSlice + delta2 - 1;
    for (int i = startSlice; i < endSlice; i++) {
      distributiond[i - startSlice] = (double) distributioni[i];
    }
    termDocMap
        .entrySet()
        .stream()
        .forEach(
            (entry) -> {
              Integer frequencyf[] = entry.getValue();
              double frequencyd[] = new double[delta2];
              for (int i = startSlice; i < endSlice; i++) {
                frequencyd[i - startSlice] = (double) frequencyf[i];
              }
              keyWords.add(new EDCoWKeyword(entry.getKey(), frequencyd, delta, distributiond));
            });
    double[] autoCorrelationValues = new double[keyWords.size()];
    for (int i = 0; i < keyWords.size(); i++) {
      autoCorrelationValues[i] = keyWords.get(i).getAutoCorrelation();
    }
    EDCoWThreshold th1 = new EDCoWThreshold();
    double theta1 = th1.theta1(autoCorrelationValues, gamma);

    // Removing trivial keywords based on theta1
    LinkedList<EDCoWKeyword> keyWordsList1 = new LinkedList<>();
    keyWords
        .stream()
        .filter((k) -> (k.getAutoCorrelation() > theta1))
        .forEach(
            (k) -> {
              keyWordsList1.add(k);
            });

    keyWordsList1
        .stream()
        .forEach(
            (kw1) -> {
              kw1.computeCrossCorrelation(keyWordsList1);
            });

    double[][] bigMatrix = new double[keyWordsList1.size()][keyWordsList1.size()];
    for (int i = 0; i < keyWordsList1.size(); i++) {
      bigMatrix[i] = keyWordsList1.get(i).getCrossCorrelation();
    }

    // Compute theta2 using the BigMatrix
    double theta2 = th1.theta2(bigMatrix, gamma);
    for (int i = 0; i < keyWordsList1.size(); i++) {
      for (int j = i + 1; j < keyWordsList1.size(); j++) {
        bigMatrix[i][j] = (bigMatrix[i][j] < theta2) ? 0 : bigMatrix[i][j];
      }
    }
    EDCoWModularityDetection modularity =
        new EDCoWModularityDetection(keyWordsList1, bigMatrix, startSlice, endSlice);

    double thresholdE = 0.1;
    ArrayList<Community> finalArrCom = modularity.getCommunitiesFiltered(thresholdE);
    finalArrCom
        .stream()
        .map(
            (c) -> {
              System.out.println(c.getCommunitySize());
              return c;
            })
        .forEach(
            (c) -> {
              modularity.saveEventFromCommunity(c);
            });
    eventList.addAll(modularity.getEvents());
  }

Пример #3

Показать файл

Файл: EDCoW.java Проект: Lefteris008/EvS

  @Override
  public void apply() {
    long startTime = System.currentTimeMillis();

    double minTermOccur = minTermSupport * countCorpus; // Min support * Message count corpus
    double maxTermOccur = maxTermSupport * countCorpus; // Max support * Message count corpus

    int windows = (timeSliceB - timeSliceA) / delta2;
    termDocMap = new HashMap<>();
    eventList = new LinkedList<>();

    PrintUtilities.printInfoMessageln("Calculating term frequencies...");
    List<String> terms = corpus.getTerms();
    for (int i = 0; i < terms.size(); i++) {
      String term = terms.get(i);
      if (term.length()
          > 1) { // Stopwords check removed as they are already ommitted when creating the dataset
        Integer[] frequency = corpus.getDocumentsTermFrequency(i);
        int cf = 0;
        for (int freq : frequency) {
          cf += freq;
        }
        if (cf > minTermOccur && cf < maxTermOccur) {
          termDocMap.put(term, frequency);
        }
      }
    }
    PrintUtilities.printInfoMessageln("Calculating windows...");
    for (int i = 0; i < windows; i++) {
      PrintUtilities.printInfoMessageln("Calculating window " + (i + 1) + "\n");
      try {
        processWindow(i);
      } catch (Exception ex) {
        Logger.getLogger(EDCoW.class.getName()).log(Level.SEVERE, null, ex);
      }
    }
    Collections.sort(eventList);
    events = new EDCoWEvents();

    eventList
        .stream()
        .forEach(
            (event) -> {
              // try {
              events.list.add(
                  new EDCoWEvent(
                      event.getKeywordsIDsAsString(),
                      corpus.getDateFromTimeSlice((int) event.startSlice)
                          + ","
                          + corpus.getDateFromTimeSlice((int) event.endSlice - 1),
                      corpus.getIDsOfWindowAsString(
                          corpus.getDateFromTimeSlice((int) event.startSlice),
                          corpus.getDateFromTimeSlice((int) event.endSlice - 1))));
            });

    events.setFullList();

    long endTime = System.currentTimeMillis();
    executionTime = (endTime - startTime) / 1000;
    PrintUtilities.printExecutionTime(
        startTime,
        endTime,
        EDCoW.class.getName(),
        Thread.currentThread().getStackTrace()[1].getMethodName());
  }