Пример #1
0
    void ComputeEmissionProbsForDoc(_Doc d) {
      for (int i = 0; i < d.getSenetenceSize(); i++) {
        _Stn stn = d.getSentence(i);
        Arrays.fill(emission[i], 0);

        int start = 0, end = this.number_of_topics;
        if (i == 0 && d.getSourceType() == 2) { // first sentence is specially handled for newEgg
          // get the sentiment label of the first sentence
          int sentimentLabel = stn.getSentenceSenitmentLabel();
          if (sentimentLabel == 0) { // positive sentiment in the first half					
            end = this.number_of_topics / 2;
            for (int k = end; k < this.number_of_topics; k++)
              emission[i][k] = Double.NEGATIVE_INFINITY;
          } else if (sentimentLabel == 1) { // negative sentiment in the second half
            start = this.number_of_topics / 2;
            for (int k = 0; k < start; k++) emission[i][k] = Double.NEGATIVE_INFINITY;
          }
        }

        for (int k = start; k < end; k++) {
          for (_SparseFeature w : stn.getFv()) {
            emission[i][k] +=
                w.getValue() * topic_term_probabilty[k][w.getIndex()]; // all in log-space
          }
        }
      }
    }
Пример #2
0
  public static _SparseFeature[] createSpVct(ArrayList<HashMap<Integer, Double>> vcts) {
    HashMap<Integer, _SparseFeature> spVcts = new HashMap<Integer, _SparseFeature>();
    HashMap<Integer, Double> vPtr;
    _SparseFeature spV;

    int dim = vcts.size();
    for (int i = 0; i < dim; i++) {
      vPtr = vcts.get(i);
      if (vPtr == null || vPtr.isEmpty())
        continue; // it is possible that we are missing this dimension

      // iterate through all the features in this section
      Iterator<Entry<Integer, Double>> it = vPtr.entrySet().iterator();
      while (it.hasNext()) {
        Map.Entry<Integer, Double> pairs = (Map.Entry<Integer, Double>) it.next();
        int index = pairs.getKey();
        double value = pairs.getValue();
        if (spVcts.containsKey(index)) {
          spV = spVcts.get(index);
          spV.addValue(value); // increase the total value
        } else {
          spV = new _SparseFeature(index, value, dim);
          spVcts.put(index, spV);
        }
        spV.setValue4Dim(value, i);
      }
    }

    int size = spVcts.size();
    _SparseFeature[] resultVct = spVcts.values().toArray(new _SparseFeature[size]);

    Arrays.sort(resultVct);
    return resultVct;
  }
Пример #3
0
  // Dot product of the random vector and document sparse vector.
  public static double dotProduct(double[] vct, _SparseFeature[] sf) {
    if (sf[sf.length - 1].getIndex() > vct.length)
      System.err.print("Error in computing dot product between a sparse vector and a full vector");

    double value = 0;
    for (_SparseFeature fv : sf) value += vct[fv.getIndex()] * fv.getValue();
    return value;
  }
Пример #4
0
 public static Feature[] createLibLinearFV(HashMap<Integer, Double> spVct) {
   Feature[] node = new Feature[spVct.size()];
   int fid = 0;
   for (_SparseFeature fv : createSpVct(spVct))
     node[fid++] =
         new FeatureNode(1 + fv.getIndex(), fv.getValue()); // svm's feature index starts from 1
   return node;
 }
Пример #5
0
  // L2 normalization: fsValue/sqrt(sum of fsValue*fsValue)
  public static double sumOfFeaturesL2(_SparseFeature[] fs) {
    if (fs == null) return 0;

    double sum = 0;
    for (_SparseFeature feature : fs) {
      double value = feature.getValue();
      sum += value * value;
    }
    return Math.sqrt(sum);
  }
Пример #6
0
 // Get projectSpVct by building a hashmap<Integer, String> filter, added by Lin.
 public static _SparseFeature[] projectSpVct(
     _SparseFeature[] fv, HashMap<Integer, String> filter) {
   ArrayList<_SparseFeature> pFv = new ArrayList<_SparseFeature>();
   for (_SparseFeature f : fv) {
     if (filter.containsKey(f.getIndex())) {
       pFv.add(new _SparseFeature(f.getIndex(), f.getValue()));
     }
   }
   if (pFv.isEmpty()) return null;
   else return pFv.toArray(new _SparseFeature[pFv.size()]);
 }
Пример #7
0
 public static void L2Normalization(_SparseFeature[] fs) {
   double sum = sumOfFeaturesL2(fs);
   if (sum > 0) {
     for (_SparseFeature f : fs) {
       double normValue = f.getValue() / sum;
       f.setValue(normValue);
     }
   } else {
     for (_SparseFeature f : fs) {
       f.setValue(0.0);
     }
   }
 }
Пример #8
0
  public static Feature[] createLibLinearFV(_SparseFeature[] spVct, int fSize) {
    Feature[] node;
    if (fSize > 0) // include bias term in the end
    node = new Feature[1 + spVct.length];
    else // ignore bias term
    node = new Feature[spVct.length];

    int fid = 0;
    for (_SparseFeature fv : spVct)
      node[fid++] =
          new FeatureNode(1 + fv.getIndex(), fv.getValue()); // svm's feature index starts from 1
    if (fSize > 0) node[fid] = new FeatureNode(1 + fSize, 1.0);
    return node;
  }
Пример #9
0
  public static _SparseFeature[] MergeSpVcts(ArrayList<_SparseFeature[]> vcts) {
    HashMap<Integer, Double> vct = new HashMap<Integer, Double>();

    for (_SparseFeature[] fv : vcts) {
      for (_SparseFeature f : fv) {
        int x = f.getIndex();
        if (vct.containsKey(x)) {
          vct.put(x, vct.get(x) + f.getValue());
        } else {
          vct.put(x, f.getValue());
        }
      }
    }
    return Utils.createSpVct(vct);
  }
Пример #10
0
 // probabilities of topic assignment
 void accPhiStat(_Doc d) {
   double prob;
   for (int t = 0; t < d.getSenetenceSize(); t++) {
     _Stn s = d.getSentence(t);
     for (_SparseFeature f : s.getFv()) {
       int wid = f.getIndex();
       double v = f.getValue(); // frequency
       for (int i = 0; i < number_of_topics; i++) {
         prob = this.p_dwzpsi[t][i];
         for (int j = 1; j < constant; j++) prob += this.p_dwzpsi[t][i + j * number_of_topics];
         this.sstat[i][wid] += v * prob;
       }
     }
   }
 }
Пример #11
0
  // Calculate the similarity between two sparse vectors.
  public static double calculateSimilarity(_SparseFeature[] spVct1, _SparseFeature[] spVct2) {
    if (spVct1 == null || spVct2 == null) return 0; // What is the minimal value of similarity?

    double similarity = 0;
    int pointer1 = 0, pointer2 = 0;
    while (pointer1 < spVct1.length && pointer2 < spVct2.length) {
      _SparseFeature temp1 = spVct1[pointer1];
      _SparseFeature temp2 = spVct2[pointer2];
      if (temp1.getIndex() == temp2.getIndex()) {
        similarity += temp1.getValue() * temp2.getValue();
        pointer1++;
        pointer2++;
      } else if (temp1.getIndex() > temp2.getIndex()) pointer2++;
      else pointer1++;
    }
    return similarity;
  }
Пример #12
0
  public static double jaccard(_SparseFeature[] spVct1, _SparseFeature[] spVct2) {
    if (spVct1 == null || spVct2 == null) return 0; // What is the minimal value of similarity?

    double overlap = 0;
    int pointer1 = 0, pointer2 = 0;
    while (pointer1 < spVct1.length && pointer2 < spVct2.length) {
      _SparseFeature temp1 = spVct1[pointer1];
      _SparseFeature temp2 = spVct2[pointer2];
      if (temp1.getIndex() == temp2.getIndex()) {
        overlap++;
        pointer1++;
        pointer2++;
      } else if (temp1.getIndex() > temp2.getIndex()) pointer2++;
      else pointer1++;
    }
    return overlap / (spVct1.length + spVct2.length);
  }
Пример #13
0
  // x_i - x_j
  public static _SparseFeature[] diffVector(_SparseFeature[] spVcti, _SparseFeature[] spVctj) {
    // first deal with special case
    if (spVcti == null && spVctj == null) return null;
    else if (spVctj == null) return spVcti;
    else if (spVcti == null) return negSpVct(spVctj);

    ArrayList<_SparseFeature> vectorList = new ArrayList<_SparseFeature>();
    int i = 0, j = 0;
    _SparseFeature fi = spVcti[i], fj = spVctj[j];

    double fv;
    while (i < spVcti.length && j < spVctj.length) {
      fi = spVcti[i];
      fj = spVctj[j];

      if (fi.getIndex() == fj.getIndex()) {
        fv = fi.getValue() - fj.getValue();
        if (Math.abs(fv) > Double.MIN_VALUE) // otherwise it is too small
        vectorList.add(new _SparseFeature(fi.getIndex(), fv));
        i++;
        j++;
      } else if (fi.getIndex() > fj.getIndex()) {
        vectorList.add(new _SparseFeature(fj.getIndex(), -fj.getValue()));
        j++;
      } else {
        vectorList.add(new _SparseFeature(fi.getIndex(), fi.getValue()));
        i++;
      }
    }

    while (i < spVcti.length) {
      fi = spVcti[i];
      vectorList.add(new _SparseFeature(fi.getIndex(), fi.getValue()));
      i++;
    }

    while (j < spVctj.length) {
      fj = spVctj[j];
      vectorList.add(new _SparseFeature(fj.getIndex(), -fj.getValue()));
      j++;
    }

    return vectorList.toArray(new _SparseFeature[vectorList.size()]);
  }
Пример #14
0
 // L1 normalization: fsValue/sum(abs(fsValue))
 public static double sumOfFeaturesL1(_SparseFeature[] fs) {
   double sum = 0;
   for (_SparseFeature feature : fs) sum += Math.abs(feature.getValue());
   return sum;
 }