public AssociativeArray2D getWordProbabilitiesPerTopic() {
    AssociativeArray2D ptw = new AssociativeArray2D();

    ModelParameters modelParameters = knowledgeBase.getModelParameters();
    TrainingParameters trainingParameters = knowledgeBase.getTrainingParameters();

    // initialize a probability list for every topic
    int k = trainingParameters.getK();
    for (Integer topicId = 0; topicId < k; ++topicId) {
      ptw.put(topicId, new AssociativeArray());
    }

    int d = modelParameters.getD();
    double beta = trainingParameters.getBeta();

    Map<List<Object>, Integer> topicWordCounts = modelParameters.getTopicWordCounts();
    Map<Integer, Integer> topicCounts = modelParameters.getTopicCounts();
    for (Map.Entry<List<Object>, Integer> entry : topicWordCounts.entrySet()) {
      List<Object> tpk = entry.getKey();
      Integer topicId = (Integer) tpk.get(0);
      Object word = tpk.get(1);
      Integer njw = entry.getValue();

      Integer nj = topicCounts.get(topicId);

      double probability = (njw + beta) / (nj + beta * d);

      ptw.get(topicId).put(word, probability);
    }

    for (Integer topicId = 0; topicId < k; ++topicId) {
      ptw.put(topicId, MapFunctions.sortAssociativeArrayByValueDescending(ptw.get(topicId)));
    }

    return ptw;
  }
Esempio n. 2
0
  /**
   * Calculates the p-value of null Hypothesis.
   *
   * @param transposeDataCollection
   * @return
   * @throws IllegalArgumentException
   */
  public static double getPvalue(TransposeDataCollection transposeDataCollection)
      throws IllegalArgumentException {
    if (transposeDataCollection.size() != 2) {
      throw new IllegalArgumentException();
    }

    Object[] keys = transposeDataCollection.keySet().toArray();

    // counter of uncencored internalData in each group
    Map<Object, Integer> n = new HashMap<>();
    n.put(keys[0], 0);
    n.put(keys[1], 0);

    Queue<Double> censoredData = new PriorityQueue<>();
    Queue<Double> uncensoredData = new PriorityQueue<>();
    for (Map.Entry<Object, FlatDataCollection> entry : transposeDataCollection.entrySet()) {
      Object j = entry.getKey();
      FlatDataCollection flatDataCollection = entry.getValue();

      for (Object value : flatDataCollection) {
        String str = value.toString();
        if (str.endsWith(CensoredDescriptives.CENSORED_NUMBER_POSTFIX)) {
          // censored internalData encoded as 4.3+ or -4.3+
          censoredData.add(
              Double.valueOf(
                  str.substring(
                      0,
                      str.length()
                          - CensoredDescriptives.CENSORED_NUMBER_POSTFIX
                              .length()))); // remove the trailing char and convert it to double
        } else {
          // uncensored internalData
          uncensoredData.add(TypeInference.toDouble(value)); // convert it to double
        }
        n.put(j, n.get(j) + 1);
      }
    }

    Double currentCensored = null;
    Double currentUncensored = null;
    AssociativeArray2D testTable = new AssociativeArray2D();

    do {
      if (currentCensored == null) {
        currentCensored = censoredData.poll();
      }
      if (currentUncensored == null) {
        currentUncensored = uncensoredData.poll();
      }

      Double ti;
      String key;
      if (currentUncensored == null) {
        key = currentCensored.toString().concat((CensoredDescriptives.CENSORED_NUMBER_POSTFIX));
        ti = currentCensored;
        currentCensored = null;
      } else if (currentCensored == null) {
        key = currentUncensored.toString();
        ti = currentUncensored;
        currentUncensored = null;
      } else if (currentCensored
          < currentUncensored) { // NOT EQUAL! Uncensored internalData of the same value are always
                                 // larger
        key = currentCensored.toString().concat(CensoredDescriptives.CENSORED_NUMBER_POSTFIX);
        ti = currentCensored;
        currentCensored = null;
      } else {
        key = currentUncensored.toString();
        ti = currentUncensored;
        currentUncensored = null;
      }

      Object value = testTable.get2d(key, "mi");
      if (value == null) {
        testTable.put2d(key, "mi", 1);
        testTable.put2d(key, "rti", 0);
      } else {
        testTable.put2d(key, "mi", ((Integer) value) + 1);
        continue; // continue in order not to count twice the r*ti below
      }

      for (Map.Entry<Object, FlatDataCollection> entry : transposeDataCollection.entrySet()) {
        Object j = entry.getKey();
        FlatDataCollection flatDataCollection = entry.getValue();

        for (Object value2 : flatDataCollection) {
          double v;
          String str = value2.toString();
          if (str.endsWith(CensoredDescriptives.CENSORED_NUMBER_POSTFIX)) {
            // censored internalData encoded as 4.3+ or -4.3+
            v =
                Double.valueOf(
                    str.substring(
                        0,
                        str.length()
                            - CensoredDescriptives.CENSORED_NUMBER_POSTFIX
                                .length())); // remove the trailing char and convert it to double
          } else {
            // uncensored internalData
            v = TypeInference.toDouble(value2); // convert it to double
          }

          if (v >= ti) {
            testTable.put2d(key, "rti", (Integer) testTable.get2d(key, "rti") + 1);
          }
        }
      }

    } while (currentCensored != null
        || currentUncensored != null
        || !censoredData.isEmpty()
        || !uncensoredData.isEmpty());

    censoredData = null;
    uncensoredData = null;

    double VarS = 0.0;

    Object previousUncencoredKey = null;
    for (Map.Entry<Object, AssociativeArray> entry : testTable.entrySet()) {
      Object ti = entry.getKey();
      AssociativeArray testRow = entry.getValue();

      double previousUncencoredValue = 0;

      Object tmp = testTable.get2d(previousUncencoredKey, "eti");
      if (tmp != null) {
        previousUncencoredValue = TypeInference.toDouble(tmp);
      }

      if (!ti.toString().endsWith(CensoredDescriptives.CENSORED_NUMBER_POSTFIX)) { // uncensored
        double mi = testRow.getDouble("mi");
        double rti = testRow.getDouble("rti");
        double eti = previousUncencoredValue + mi / rti;

        testRow.put("eti", eti);
        testRow.put("wi", 1 - eti);
        previousUncencoredKey = ti;
      } else { // censored
        testRow.put("wi", -previousUncencoredValue);
      }

      double wi = testRow.getDouble("wi");
      VarS += testRow.getDouble("mi") * wi * wi;
    }

    double S = 0.0;
    for (Object value : transposeDataCollection.get(keys[0])) { // if ti belongs to the first group
      Object
          key; // we must first convert the number into to double and then append the + if
               // necessary. This is why it's converted like this.
      String str = value.toString();
      if (str.endsWith(CensoredDescriptives.CENSORED_NUMBER_POSTFIX)) {
        // censored internalData encoded as 4.3+ or -4.3+
        Double v =
            Double.valueOf(
                str.substring(
                    0,
                    str.length()
                        - CensoredDescriptives.CENSORED_NUMBER_POSTFIX
                            .length())); // remove the trailing char and convert it to double
        key = v.toString() + CensoredDescriptives.CENSORED_NUMBER_POSTFIX;
      } else {
        // uncensored internalData
        Double v = TypeInference.toDouble(value); // convert it to double
        key = v.toString();
      }
      double wi = TypeInference.toDouble(testTable.get2d(key, "wi"));
      S += wi;
    }
    testTable = null;

    double n0 = n.get(keys[0]).doubleValue();
    double n1 = n.get(keys[1]).doubleValue();

    VarS *= n0 * n1 / ((n0 + n1) * (n0 + n1 - 1.0));

    double Z = S / Math.sqrt(VarS);

    double pvalue = scoreToPvalue(Z);

    return pvalue;
  }