@Override public void setup(GibbsSampling.Context context) { Configuration conf = context.getConfiguration(); if (conf.getBoolean("minibatch.job.setup", false)) { Path[] jobSetupFiles = new Path[0]; try { jobSetupFiles = DistributedCache.getLocalCacheFiles(conf); } catch (IOException ioe) { System.err.println( "Caught exception while getting cached files: " + StringUtils.stringifyException(ioe)); } for (Path jobSetup : jobSetupFiles) { parseJobSetup(jobSetup); } } }
// TODO: DOUBLECHECK EVERYTHING @Override public void map(Text key, jBLASArrayWritable input, GibbsSampling.Context context) throws IOException, InterruptedException { /* *******************************************************************/ /* initialize all memory we're going to use during the process */ long start_time = System.nanoTime(); ArrayList<DoubleMatrix> data = input.getData(); label = data.get(4); v_data = data.get(5); // check to see if we are in the first layer or there are layers beneath us we must sample // from if (data.size() > 6) { int prelayer = (data.size() - 6) / 3; DoubleMatrix[] preWeights = new DoubleMatrix[prelayer], preHbias = new DoubleMatrix[prelayer], preVbias = new DoubleMatrix[prelayer]; for (int i = 0; i < prelayer; i++) { preWeights[i] = data.get(6 + i * 3); preHbias[i] = data.get(7 + i * 3); preVbias[i] = data.get(8 + i * 3); } DoubleMatrix vnew = null; for (int i = 0; i < prelayer; i++) { weights = preWeights[i]; vbias = preVbias[i]; hbias = preHbias[i]; vnew = sample_h_from_v(i == 0 ? v_data : vnew); } v_data = vnew; } weights = data.get(0); hbias = data.get(1); hiddenChain = data.get(2); vbias = data.get(3); // check if we need to attach labels to the observed variables if (vbias.columns != v_data.columns) { DoubleMatrix labels = DoubleMatrix.zeros(1, classCount); int labelNum = (new Double(label.get(0))).intValue(); labels.put(labelNum, 1.0); v_data = DoubleMatrix.concatHorizontally(v_data, labels); } w1 = DoubleMatrix.zeros(weights.rows, weights.columns); hb1 = DoubleMatrix.zeros(hbias.rows, hbias.columns); vb1 = DoubleMatrix.zeros(vbias.rows, vbias.columns); /* ********************************************************************/ // sample hidden state to get positive phase // if empty, use it as the start of the chain // or use persistent hidden state from pCD DoubleMatrix phaseSample = sample_h_from_v(v_data); h1_data = new DoubleMatrix(); v1_data = new DoubleMatrix(); if (hiddenChain == null) { data.set(2, new DoubleMatrix(hbias.rows, hbias.columns)); hiddenChain = data.get(2); hiddenChain.copy(phaseSample); h1_data.copy(phaseSample); } else { h1_data.copy(hiddenChain); } // run Gibbs chain for k steps for (int j = 0; j < gibbsSteps; j++) { v1_data.copy(sample_v_from_h(h1_data)); h1_data.copy(sample_h_from_v(v1_data)); } DoubleMatrix hprob = propup(v1_data); weight_contribution(hiddenChain, v_data, hprob, v1_data); hiddenChain.copy(h1_data); data.get(0).copy(w1); data.get(1).copy(hb1); data.get(2).copy(hiddenChain); data.get(3).copy(vb1); jBLASArrayWritable outputmatrix = new jBLASArrayWritable(data); context.write(key, outputmatrix); log.info("Job completed in: " + (System.nanoTime() - start_time) / (1E6) + " ms"); }