Пример #1
   * @param row
   * @return - array of counts of one-bits for each cell in the row.
  public int[] getRowBitCount(int row) {
    int[] bits = new int[columns()];
    for (FlexCompRowMatrix cell : this.matrix) {
      SparseVector vector = cell.getRow(row);

       * Sparse vector: has indices (>0 except for first position) saying where values are; data are the values.
      double[] data = vector.getData();
      int[] indices = vector.getIndex();
      for (int j = 0; j < data.length; j++) {
        if (indices[j] == 0 && j > 0) break;
        if (data[j] != 0.0) bits[indices[j]] += countBits(data[j]);
    return bits;
  public static void main(String... args) throws Exception {

     * The inside and outisde projection matrices I suppose
    Object[] matrices = new Object[2];

     * Data structure to hold all the sparse vectors
    // ArrayList<SparseVector> phiList = new ArrayList<SparseVector>();
    // ArrayList<SparseVector> psiList = new ArrayList<SparseVector>();

     * Used to normalize the trees

    PTBTreeNormaliser treeNormalizer = new PTBTreeNormaliser(true);

     * Getting the feature dictionary path, i.e. the serialized file path.
     * This dictionary will be used to form the feature vectors.
    String featureDictionary = null;

     * This variable tells the code about the directory path where parse
     * trees are stored from which feature vectors need to be extracted
     * corresponding to all the nodes
    String parsedTreeCorpus = null;

     * The feature dictionary that needs to be used while extracting
     * features
    featureDictionary = "/disk/scratch/s1444025/worddictionary/worddictionary.ser";
     * The directory that holds the parse trees that are iterated over to
     * extract the feature vector corresponding to the nodes
    parsedTreeCorpus = "/afs/inf.ed.ac.uk/group/project/vsm.restored/trees";
     * Necessary to get the appropriate directory structure
    // countMapLoc =
    // "/afs/inf.ed.ac.uk/group/project/vsm/countmapnodesamples/countMap.ser";

     * Getting the serialised dictionary bean object that contains the
     * inside and outside feature dictionaries which are used to form the
     * feature vectors
    VSMWordDictionaryBean dictionaryBean =

     * Getting the inside and outside feature dictionaries, that are used
     * for forming the feature vectors
    System.out.println("***Getting word dictionary*****");
    Alphabet wordDictionary = dictionaryBean.getWordDictionary();
    // System.out.println(wordDictionary.size());

    dprime = wordDictionary.size();
    d = wordDictionary.size();

    SparseMatrixLil PsiTPsi = new SparseMatrixLil(dprime, dprime);
    SparseMatrixLil PsiTPhi = new SparseMatrixLil(dprime, d);
    SparseMatrixLil PhiTPhi = new SparseMatrixLil(d, d);
    SparseMatrixLil PhiTPsi = new SparseMatrixLil(d, dprime);

     * The parsed tree corpus from where the feature vectors need to be
     * extracted corresponding to all the nodes
    File[] files =
        new File(parsedTreeCorpus)
                new FileFilter() {

                  public boolean accept(File file) {
                    return !file.isHidden();

    ArrayList<String> filePaths = VSMUtil.getFilePaths(files);

     * The obect that is used to serialize the feature vector bean. The
     * feature vector bean storing the inside and outside feature vectors
     * corresponding to a particular node in a tree. Each feature vector
     * bean holds the feature vectors for one particular node
    VSMSerializeFeatureVectorBeanWord serializeBean = null;

     * If we already have a serialized count map object then we would want
     * to start from where we left
    // File fileCountMap = new File(countMapLoc);

    serializeBean = new VSMSerializeFeatureVectorBeanWord();
    // } else {
    // VSMCountMap countMapObj = VSMReadSerialCountMap
    // .readCountMapObj(countMapLoc);
    // System.out.println("inside the count map***");
    // serializeBean = new VSMSerializeFeatureVectorBeanWord(
    // countMapObj.getCountMap());
    // }

     * Getting the data structure to store all the feature vectors in it, We
     * are taking 200000 samples for a particular non-terminal
    SparseMatrixLil Phi = new SparseMatrixLil(300000, d);
    SparseMatrixLil Psi = new SparseMatrixLil(300000, dprime);

    int count = 0;

    for (String filePath : filePaths) {

       * Getting an iterator over the trees in the file
      PennTreeReader treeReader = VSMUtil.getTreeReader(filePath);

       * Iterating over all the trees
      while (treeReader.hasNext()) {

         * The syntax tree
        Tree<String> syntaxTree = null;

         * Unmatched parentheses exception. Does this mean that the
         * BLLIP corpus sometimes does not have correct parse trees?
         * Strange
        try {
          syntaxTree = treeReader.next();
        } catch (RuntimeException e) {
          System.out.println("exception" + e + " ::tree  " + syntaxTree);

         * Do stuff only if the syntax tree is a valid one
        if (syntaxTree != null) {

           * Process the syntax tree to remove the top bracket
          syntaxTree = treeNormalizer.process(syntaxTree);

           * Iterator over the nodes of the tree
          Iterator<Tree<String>> nodeTrees = syntaxTree.iterator();

           * Sparse Inside and outside feature vectors declared
          no.uib.cipr.matrix.sparse.SparseVector psi = null;
          no.uib.cipr.matrix.sparse.SparseVector phi = null;
          Tree<String> insideTree = null;

           * Iterating over all the nodes in a particular syntax tree
          while (nodeTrees.hasNext()) {

             * This is the inside tree for which we want to form a
             * feature vector and store it in the map
            insideTree = nodeTrees.next();

             * Only do stuff if inside tree is not a leaf
            if (!insideTree.isLeaf() && insideTree.getLabel().equalsIgnoreCase("NNS")) {

               * Setting the object's properties that are stored
               * in the .ser file
              VSMWordFeatureVectorBean vectorBean = new VSMWordFeatureVectorBean();

                  "****Extracting inside and outside feature vectors for node****  "
                      + insideTree.getLabel());

               * Getting the inside and outside feature vectors
               * corresponding to the partcular node

              psi =
                  new VSMOutsideFeatureVectorWords()
                          syntaxTree, insideTree, wordDictionary, vectorBean);
              // psiList.add(psi);

              phi =
                  new VSMInsideFeatureVectorWords()
                      .getInsideFeatureVectorPhi(insideTree, wordDictionary, vectorBean);
              // phiList.add(phi);

              System.out.println("got the sparse vectors*** ");

               * Inside sparse matrix formation for the particular
               * node.

               * Do the below operation only if both psi and phi
               * are not null for the given node sample and also
               * if either psi pr phi are different than before
               * for this spample, if both are same then no need
               * to unecessarily fill up Psi and Phi

              if (phi != null && psi != null) {


                System.out.println("****Filling in the matrices***");

                int[] indicesPhi = phi.getIndex();
                double[] valuesPhi = phi.getData();
                 * Don't need the phi anymore in this iteration
                phi = null;
                 * Putting the inside feature vector into the
                 * inside feature matrix
                for (int i = 0; i < indicesPhi.length; i++) {
                  Phi.append(count, indicesPhi[i], valuesPhi[i]);
                indicesPhi = null;
                valuesPhi = null;

                 * Outside sparse matrix formation for the
                 * particular node
                int[] indicesPsi = psi.getIndex();
                double[] valuesPsi = psi.getData();
                psi = null;

                 * Putting the outside feature vector into the
                 * outside feature matrix
                for (int j = 0; j < indicesPsi.length; j++) {
                  Psi.append(count, indicesPsi[j], valuesPsi[j]);
                indicesPsi = null;
                valuesPsi = null;

                 * Storing the feature vectors in a bean which
                 * will be serialized for future use

                 * Serialize the feature vector bean
                 * corresponding to the particular node. The
                 * feature vector bean contains the sparse
                 * inside and outside feature vectors
                System.out.println("***Serialized the feature vector***");


                 * Break when we have 200000 samples
                if (count == (Psi.rows - 1)) {
                  break mainloop;

     * Call the CCA function here

    System.out.println("*****Done with matrices formation****");

     * Just calculating the co-vavriance, assuming that the data is centered
     * and normalized

    System.out.println("***Calculating Covariances****");
    PsiTPsi = Psi.t().mmul(Psi); // d' \times d'
    PsiTPhi = Psi.t().mmul(Phi); // d' \times d
    PhiTPhi = Phi.t().mmul(Phi); // d \times d
    PhiTPsi = Phi.t().mmul(Psi); // d \times d'
    System.out.println("****Done with it***");

     * Log and square root transform
    PsiTPsi =
    PsiTPhi =
    PhiTPhi =
    PhiTPsi =

     * Writing the co-variance matrices in a text file to see what's going
     * on
    System.out.println("****Writing the Covarinace Matrices to the file***");
    VSMUtil.writeCovarMatrixSem(PsiTPsi, "NNS");
    VSMUtil.writeCovarMatrixSem(PsiTPhi, "NNS");
    VSMUtil.writeCovarMatrixSem(PhiTPhi, "NNS");
    VSMUtil.writeCovarMatrixSem(PhiTPsi, "NNS");

     * Done with the Psi and Phi and freeing up some space
    Psi = null;
    Phi = null;
     * Getting the the similarity scoressvd template object that has utility
     * methods to do preprocessing before performing CCA
    SVDTemplates1 svdTC = new SVDTemplates1(null);

     * Function to compute the CCA, passing the covariance matrices to the
     * function

     * Writing the projection matrices out in a file to see what is in there
    matrices = VSMUtil.deserializeCCAVariantsRunSem("NNS");
    VSMUtil.writeEigenDictInsideSemantic(matrices, "NNS", d);
    VSMUtil.writeEigenDictOutsideSem(matrices, "NNS", dprime);
    matrices = null;
    PsiTPhi = null;
    PhiTPhi = null;
    PsiTPsi = null;
    PhiTPsi = null;

     * We would also like to serialize the count map. The count map is the
     * data structure that helps us store the .ser files in proper
     * directories with proper names. So, if in future we want to extract
     * feature vectors corresponding to more parse trees, we will start from
     * where we left in the directory structure and file name

     * Getting the updated count map
    // countMap = VSMSerializeFeatureVectorBean.getCountMap();
    // /*
    // * The object that will be serialized
    // */
    // VSMCountMap countMapObject = new VSMCountMap();
    // countMapObject.setCountMap(countMap);
    // /*
    // * Serialize count map
    // */
    // VSMSerializeCountMap.serializeCountMap(countMapObject);
    // System.out.println("*****count map serialized****");