@Override public double[][] predict(List<PredictionPaper> testDocs) { String testData = "lda/test.dat"; createLdaInputTest(testData, testDocs); Utils.runCommand( "lib/lda-c-dist/lda inf " + " lib/lda-c-dist/settings.txt " + "lda/final " + testData + " lda/output", false); double[][] gammasMatrix = Utils.readMatrix("lda/output-gamma.dat", false); double alpha = Utils.readAlpha("lda/final.other"); for (int i = 0; i < gammasMatrix.length; i++) { for (int j = 0; j < gammasMatrix[i].length; j++) { gammasMatrix[i][j] -= alpha; } } SimpleMatrix gammas = new SimpleMatrix(gammasMatrix); SimpleMatrix beta = new SimpleMatrix(betaMatrix); SimpleMatrix probabilities = gammas.mult(beta); double[][] result = new double[probabilities.numRows()][probabilities.numCols()]; for (int row = 0; row < probabilities.numRows(); row++) { for (int col = 0; col < probabilities.numCols(); col++) { result[row][col] = probabilities.get(row, col); } } return result; }
public static List<Cluster> ReassignCentrids( List<Cluster> kCentroids, SimpleMatrix distanceMatrix, SimpleMatrix dataSet, int[] featureSet) { List<Cluster> kCentroids_l = kCentroids; int[] clusterLoc = new int[dataSet.numRows()]; for (int iRows = 0; iRows < dataSet.numRows(); iRows++) { int clusterNo = 1; double minvalue = distanceMatrix.get(iRows, 1); for (int iCentroid = 0; iCentroid < kCentroids_l.size(); iCentroid++) { // System.out.println(iRows+" "+iCentroid); if (distanceMatrix.get(iRows, iCentroid) < minvalue) { clusterNo = iCentroid; minvalue = distanceMatrix.get(iRows, iCentroid); } } clusterLoc[iRows] = clusterNo; } // Backup Centroids anc clear current centroids for (int i = 0; i < kCentroids_l.size(); i++) { kCentroids_l.get(i).backup(); kCentroids_l.get(i).noPoints = 0; Arrays.fill(kCentroids_l.get(i).currPoints, -1); Arrays.fill(kCentroids_l.get(i).intIndex, -1); // System.out.println("Clusters Backed Up!"); } // printKCentroids(kCentroids_l); for (int i = 0; i < clusterLoc.length; i++) { int insLoc = kCentroids_l.get(clusterLoc[i]).noPoints; // System.out.println("Getting element"+dataSet.get(i, 0)); kCentroids_l.get(clusterLoc[i]).currPoints[insLoc] = (int) dataSet.get(i, 0); kCentroids_l.get(clusterLoc[i]).intIndex[insLoc] = i; kCentroids_l.get(clusterLoc[i]).noPoints++; } // System.out.println(Arrays.toString(clusterLoc)); // Now Calculate the best representative and give as new centroid values for (int i = 0; i < kCentroids.size(); i++) { for (int j = 0; j < featureSet.length; j++) { double tempAvg = 0; int[] travVector = kCentroids_l.get(i).intIndex; for (int k = 0; k < travVector.length; k++) { if (travVector[k] != -1) { tempAvg = tempAvg + dataSet.get(travVector[k], featureSet[j]); } } kCentroids.get(i).clusterCenter.set(0, featureSet[j], tempAvg / kCentroids.get(i).noPoints); } } return kCentroids_l; }
/** * Finds the values of a,b,c which minimize * * <p>sum (a*x(+)_i + b*y(+)_i + c - x(-)_i)^2 * * <p>See page 306 * * @return Affine transform */ private SimpleMatrix computeAffineH( List<AssociatedPair> observations, DenseMatrix64F H, DenseMatrix64F Hzero) { SimpleMatrix A = new SimpleMatrix(observations.size(), 3); SimpleMatrix b = new SimpleMatrix(A.numRows(), 1); Point2D_F64 c = new Point2D_F64(); Point2D_F64 k = new Point2D_F64(); for (int i = 0; i < observations.size(); i++) { AssociatedPair a = observations.get(i); GeometryMath_F64.mult(Hzero, a.p1, k); GeometryMath_F64.mult(H, a.p2, c); A.setRow(i, 0, k.x, k.y, 1); b.set(i, 0, c.x); } SimpleMatrix x = A.solve(b); SimpleMatrix Ha = SimpleMatrix.identity(3); Ha.setRow(0, 0, x.getMatrix().data); return Ha; }
// ecuatia Rosenbrock // x(i) is coded with 8 bits public double fitness(SimpleMatrix cromosom) { int value = 0; int n = cromosom.numRows(); double x1, x2; for (int i = 0; i < n - 1; i++) { x1 = cromosom.get(i); x2 = cromosom.get(i + 1); value += (100 * Math.pow(x2 - Math.pow(x1, 2), 2) + Math.pow(x1 - 1, 2)); } return value; }
public static SimpleMatrix compDist( List<Cluster> kCentroids, SimpleMatrix dataSet, int[] featureSet, String distanceMetric) { int dRows = dataSet.numRows(); int dCols = kCentroids.size(); int[] features = featureSet; SimpleMatrix distMatrix = new SimpleMatrix(dRows, dCols); for (int iCentroid = 0; iCentroid < dCols; iCentroid++) { Cluster kcenter = kCentroids.get(iCentroid); for (int iRows = 0; iRows < dRows; iRows++) { double distTemp = 0; for (int iFeature = 0; iFeature < features.length; iFeature++) { double cX = kcenter.clusterCenter.get(0, features[iFeature]); double dX = dataSet.get(iRows, features[iFeature]); distTemp = distTemp + Math.pow(cX - dX, 2); } distMatrix.set(iRows, iCentroid, Math.sqrt(distTemp)); } } return distMatrix; }
public static void main(String[] args) { try { if (args.length < 5) { System.out.println( "Invalid Syntax usage. \n The Syntax is KMeans inputfile distance-metric #Centroids #Iterations Tolerance featureSet crossValidation testDataSet"); } String inputFile = args[0]; String distanceMetric = args[1]; int centroids = Integer.parseInt(args[2]); int iterations = Integer.parseInt(args[3]); double tolerance = Double.parseDouble(args[4]); String inpFeatures = args[5]; String crossValidation = args[6]; String testFile = args[7]; String[] temp = inpFeatures.split(","); int[] featureSet = new int[temp.length]; boolean hasConvered = false; for (int i = 0; i < temp.length; i++) { featureSet[i] = Integer.parseInt(temp[i]); } System.out.println("Features Considered are" + Arrays.toString(featureSet)); SimpleMatrix dataSet = new SimpleMatrix().loadCSV(inputFile); // Cluster Parameters int dsRows = dataSet.numRows(); int dsCol = dataSet.numCols(); // Cluster Initialization List<Cluster> kcentroids = new ArrayList<Cluster>(); for (int i = 0; i < centroids; i++) { int random = genRandom(0, dsRows - 1); int[] currPoints = new int[dsRows]; int[] intIndex = new int[dsRows]; // This will have remnance of the first chosen element Random r = new Random(); SimpleMatrix t = new SimpleMatrix().random(1, dsCol, 0, 0, r); Cluster centers = new Cluster(i, dataSet.extractVector(true, random), t, currPoints, intIndex); kcentroids.add(centers); } // printKCentroids(kcentroids); // dataSet.print(); SimpleMatrix distMatrix = compDist(kcentroids, dataSet, featureSet, distanceMetric); // distMatrix.print(); // System.out.println(distMatrix.get(0, 1)); // ReassignCentrids(kcentroids, distMatrix, dataSet,featureSet); // printKCentroids(kcentroids); for (int k = 0; k < iterations; k++) { System.out.println("------------------------Iteration " + k + " ------------------------"); distMatrix = compDist(kcentroids, dataSet, featureSet, distanceMetric); // distMatrix.print(); ReassignCentrids(kcentroids, distMatrix, dataSet, featureSet); printKCentroids(kcentroids); hasConvered = detectConvergence(kcentroids, featureSet, tolerance); // System.out.println(hasConvered); if (hasConvered) { System.out.println( "------------------------Has Converged : Tolerance------------------------"); break; } } if (!hasConvered) System.out.println( "------------------------Has Converged : Iterations------------------------"); if (crossValidation.equals("true")) { SimpleMatrix test = new SimpleMatrix().loadCSV(testFile); SimpleMatrix dist = compDist(kcentroids, test, featureSet, distanceMetric); System.out.println("----------Associating Set Data Sets to Calculated Centroid-------"); // dist.print(); List<Cluster> kCentroids_l = kcentroids; int[] clusterLoc = new int[test.numRows()]; for (int iRows = 0; iRows < test.numRows(); iRows++) { int clusterNo = 1; double minvalue = dist.get(iRows, 1); for (int iCentroid = 0; iCentroid < kCentroids_l.size(); iCentroid++) { // System.out.println(iRows+" "+iCentroid); if (dist.get(iRows, iCentroid) < minvalue) { clusterNo = iCentroid; minvalue = dist.get(iRows, iCentroid); } } clusterLoc[iRows] = clusterNo; } // System.out.println(Arrays.toString(clusterLoc)); for (int i = 0; i < kCentroids_l.size(); i++) { kCentroids_l.get(i).backup(); kCentroids_l.get(i).noPoints = 0; Arrays.fill(kCentroids_l.get(i).currPoints, -1); Arrays.fill(kCentroids_l.get(i).intIndex, -1); // System.out.println("Clusters Backed Up!"); } // printKCentroids(kCentroids_l); for (int i = 0; i < clusterLoc.length; i++) { int insLoc = kCentroids_l.get(clusterLoc[i]).noPoints; // System.out.println("Getting element"+dataSet.get(i, 0)); kCentroids_l.get(clusterLoc[i]).currPoints[insLoc] = (int) test.get(i, 0); kCentroids_l.get(clusterLoc[i]).intIndex[insLoc] = i; kCentroids_l.get(clusterLoc[i]).noPoints++; } printKCentroids(kCentroids_l); } } catch (Exception e) { System.out.println("Unfortunately There is an error."); e.printStackTrace(); } }
public void backpropDerivative( Tree tree, List<String> words, IdentityHashMap<Tree, SimpleMatrix> nodeVectors, TwoDimensionalMap<String, String, SimpleMatrix> binaryW_dfs, Map<String, SimpleMatrix> unaryW_dfs, TwoDimensionalMap<String, String, SimpleMatrix> binaryScoreDerivatives, Map<String, SimpleMatrix> unaryScoreDerivatives, Map<String, SimpleMatrix> wordVectorDerivatives, SimpleMatrix deltaUp) { if (tree.isLeaf()) { return; } if (tree.isPreTerminal()) { if (op.trainOptions.trainWordVectors) { String word = tree.children()[0].label().value(); word = dvModel.getVocabWord(word); // SimpleMatrix currentVector = nodeVectors.get(tree); // SimpleMatrix currentVectorDerivative = // nonlinearityVectorToDerivative(currentVector); // SimpleMatrix derivative = deltaUp.elementMult(currentVectorDerivative); SimpleMatrix derivative = deltaUp; wordVectorDerivatives.put(word, wordVectorDerivatives.get(word).plus(derivative)); } return; } SimpleMatrix currentVector = nodeVectors.get(tree); SimpleMatrix currentVectorDerivative = NeuralUtils.elementwiseApplyTanhDerivative(currentVector); SimpleMatrix scoreW = dvModel.getScoreWForNode(tree); currentVectorDerivative = currentVectorDerivative.elementMult(scoreW.transpose()); // the delta that is used at the current nodes SimpleMatrix deltaCurrent = deltaUp.plus(currentVectorDerivative); SimpleMatrix W = dvModel.getWForNode(tree); SimpleMatrix WTdelta = W.transpose().mult(deltaCurrent); if (tree.children().length == 2) { // TODO: RS: Change to the nice "getWForNode" setup? String leftLabel = dvModel.basicCategory(tree.children()[0].label().value()); String rightLabel = dvModel.basicCategory(tree.children()[1].label().value()); binaryScoreDerivatives.put( leftLabel, rightLabel, binaryScoreDerivatives.get(leftLabel, rightLabel).plus(currentVector.transpose())); SimpleMatrix leftVector = nodeVectors.get(tree.children()[0]); SimpleMatrix rightVector = nodeVectors.get(tree.children()[1]); SimpleMatrix childrenVector = NeuralUtils.concatenateWithBias(leftVector, rightVector); if (op.trainOptions.useContextWords) { childrenVector = concatenateContextWords(childrenVector, tree.getSpan(), words); } SimpleMatrix W_df = deltaCurrent.mult(childrenVector.transpose()); binaryW_dfs.put(leftLabel, rightLabel, binaryW_dfs.get(leftLabel, rightLabel).plus(W_df)); // and then recurse SimpleMatrix leftDerivative = NeuralUtils.elementwiseApplyTanhDerivative(leftVector); SimpleMatrix rightDerivative = NeuralUtils.elementwiseApplyTanhDerivative(rightVector); SimpleMatrix leftWTDelta = WTdelta.extractMatrix(0, deltaCurrent.numRows(), 0, 1); SimpleMatrix rightWTDelta = WTdelta.extractMatrix(deltaCurrent.numRows(), deltaCurrent.numRows() * 2, 0, 1); backpropDerivative( tree.children()[0], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, leftDerivative.elementMult(leftWTDelta)); backpropDerivative( tree.children()[1], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, rightDerivative.elementMult(rightWTDelta)); } else if (tree.children().length == 1) { String childLabel = dvModel.basicCategory(tree.children()[0].label().value()); unaryScoreDerivatives.put( childLabel, unaryScoreDerivatives.get(childLabel).plus(currentVector.transpose())); SimpleMatrix childVector = nodeVectors.get(tree.children()[0]); SimpleMatrix childVectorWithBias = NeuralUtils.concatenateWithBias(childVector); if (op.trainOptions.useContextWords) { childVectorWithBias = concatenateContextWords(childVectorWithBias, tree.getSpan(), words); } SimpleMatrix W_df = deltaCurrent.mult(childVectorWithBias.transpose()); // System.out.println("unary backprop derivative for " + childLabel); // System.out.println("Old transform:"); // System.out.println(unaryW_dfs.get(childLabel)); // System.out.println(" Delta:"); // System.out.println(W_df.scale(scale)); unaryW_dfs.put(childLabel, unaryW_dfs.get(childLabel).plus(W_df)); // and then recurse SimpleMatrix childDerivative = NeuralUtils.elementwiseApplyTanhDerivative(childVector); // SimpleMatrix childDerivative = childVector; SimpleMatrix childWTDelta = WTdelta.extractMatrix(0, deltaCurrent.numRows(), 0, 1); backpropDerivative( tree.children()[0], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, childDerivative.elementMult(childWTDelta)); } }