private void getnegphase() { /* * It does the negative phase of unsupervised RBM training algorithm * * For details, please refer to Dr. Hinton's paper: * Reducing the dimensionality of data with neural networks. Science, Vol. 313. no. 5786, pp. 504 - 507, 28 July 2006. */ // start calculate the negative phase // calculate the curved value of v1,h1 // find the vector of v1 Matrix negdata = poshidstates.times(vishid.transpose()); // (1 * numhid) * (numhid * numdims) = (1 * numdims) negdata.plusEquals(visbiases); // poshidstates*vishid' + visbiases double[][] tmp1 = negdata.getArray(); int i1 = 0; while (i1 < numdims) { tmp1[0][i1] = 1 / (1 + Math.exp(-tmp1[0][i1])); i1++; } // find the vector of h1 neghidprobs = negdata.times(vishid); // (1 * numdims) * (numdims * numhid) = (1 * numhid) neghidprobs.plusEquals(hidbiases); double[][] tmp2 = neghidprobs.getArray(); int i2 = 0; while (i2 < numhid) { tmp2[0][i2] = 1 / (1 + Math.exp(-tmp2[0][i2])); i2++; } negprods = negdata.transpose().times(neghidprobs); // (numdims * 1) *(1 * numhid) = (numdims * numhid) }
/** * Calculate how many maps to run. Number of maps is bounded by a minimum of the cumulative size * of the copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the command line) and at * most (distcp.max.map.tasks, default MAX_MAPS_PER_NODE * nodes in the cluster). * * @param totalBytes Count of total bytes for job * @param job The job to configure * @return Count of maps to run. */ private static void setMapCount(long totalBytes, JobConf job) throws IOException { int numMaps = (int) (totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP)); numMaps = Math.min( numMaps, job.getInt( MAX_MAPS_LABEL, MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers())); job.setNumMapTasks(Math.max(numMaps, 1)); }
private static void analyzeResult(FileSystem fs, int testType, long execTime, String resFileName) throws IOException { Path reduceFile; if (testType == TEST_TYPE_WRITE) reduceFile = new Path(WRITE_DIR, "part-00000"); else reduceFile = new Path(READ_DIR, "part-00000"); DataInputStream in; in = new DataInputStream(fs.open(reduceFile)); BufferedReader lines; lines = new BufferedReader(new InputStreamReader(in)); long tasks = 0; long size = 0; long time = 0; float rate = 0; float sqrate = 0; String line; while ((line = lines.readLine()) != null) { StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%"); String attr = tokens.nextToken(); if (attr.endsWith(":tasks")) tasks = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":size")) size = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":time")) time = Long.parseLong(tokens.nextToken()); else if (attr.endsWith(":rate")) rate = Float.parseFloat(tokens.nextToken()); else if (attr.endsWith(":sqrate")) sqrate = Float.parseFloat(tokens.nextToken()); } double med = rate / 1000 / tasks; double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med)); String resultLines[] = { "----- DFSCIOTest ----- : " + ((testType == TEST_TYPE_WRITE) ? "write" : (testType == TEST_TYPE_READ) ? "read" : "unknown"), " Date & time: " + new Date(System.currentTimeMillis()), " Number of files: " + tasks, "Total MBytes processed: " + size / MEGA, " Throughput mb/sec: " + size * 1000.0 / (time * MEGA), "Average IO rate mb/sec: " + med, " Std IO rate deviation: " + stdDev, " Test exec time sec: " + (float) execTime / 1000, "" }; PrintStream res = new PrintStream(new FileOutputStream(new File(resFileName), true)); for (int i = 0; i < resultLines.length; i++) { LOG.info(resultLines[i]); res.println(resultLines[i]); } }
protected void waitForOpenSlot(int maxProcessesOnNode, Reporter reporter) throws IOException, InterruptedException { while (true) { // sleep for a random length of time between 0 and 60 seconds long sleepTime = (long) (Math.random() * 1000 * 60); logger.info("sleeping for " + sleepTime); Thread.sleep(sleepTime); int numRunningMappers = getNumRunningMappers(); logger.info("num running mappers: " + numRunningMappers); if (numRunningMappers < maxProcessesOnNode) return; reporter.progress(); } }
public void map( LongWritable key, Text value, OutputCollector<DoubleWritable, DoubleWritable> output, Reporter reporter) throws IOException { String line = value.toString(); DoubleWritable clave = new DoubleWritable(); DoubleWritable valor = new DoubleWritable(); clave.set(Double.parseDouble(line)); valor.set(Math.sqrt(Double.parseDouble(line))); output.collect(clave, valor); }
@Override public void reduce( Text key, /*[*/ Iterator /*]*/<IntWritable> values, /*[*/ OutputCollector<Text, IntWritable> output, Reporter reporter /*]*/) throws IOException { int maxValue = Integer.MIN_VALUE; while ( /*[*/ values.hasNext() /*]*/) { maxValue = Math.max(maxValue, /*[*/ values.next().get() /*]*/); } /*[*/ output.collect /*]*/(key, new IntWritable(maxValue)); }
private void prop2nextLayer() { /* * It computes the forward propagation algorithm. */ poshidprobs = data.times(vishid); // (1 * numdims) * (numdims * numhid) poshidprobs.plusEquals(hidbiases); // data*vishid + hidbiases double[][] product_tmp2 = poshidprobs.getArray(); for (int i2 = 0; i2 < numhid; i2++) { /* * compute the updated input, and write them to newinput */ product_tmp2[0][i2] = 1 / (1 + Math.exp(-product_tmp2[0][i2])); newinput[i2] = (int) (product_tmp2[0][i2] * 255.0); } }
public void map( Text key, LongWritable value, OutputCollector<K, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); if (size == 0) return; reporter.setStatus("opening " + name); FSDataInputStream in = fs.open(new Path(DATA_DIR, name)); try { for (int i = 0; i < SEEKS_PER_FILE; i++) { // generate a random position long position = Math.abs(random.nextLong()) % size; // seek file to that position reporter.setStatus("seeking " + name); in.seek(position); byte b = in.readByte(); // check that byte matches byte checkByte = 0; // advance random state to that position random.setSeed(seed); for (int p = 0; p <= position; p += check.length) { reporter.setStatus("generating data for " + name); if (fastCheck) { checkByte = (byte) random.nextInt(Byte.MAX_VALUE); } else { random.nextBytes(check); checkByte = check[(int) (position % check.length)]; } } assertEquals(b, checkByte); } } finally { in.close(); } }
private void getposphase() { /* * It does the positive phase of unsupervised RBM training algorithm * * For details, please refer to Dr. Hinton's paper: * Reducing the dimensionality of data with neural networks. Science, Vol. 313. no. 5786, pp. 504 - 507, 28 July 2006. */ // Start calculate the positive phase // calculate the cured value of h0 poshidprobs = data.times(vishid); // (1 * numdims) * (numdims * numhid) poshidprobs.plusEquals(hidbiases); // data*vishid + hidbiases double[][] product_tmp2 = poshidprobs.getArray(); int i2 = 0; while (i2 < numhid) { product_tmp2[0][i2] = 1 / (1 + Math.exp(-product_tmp2[0][i2])); i2++; } posprods = data.transpose().times(poshidprobs); // (numdims * 1) * (1 * numhid) // end of the positive phase calculation, find the binary presentation of h0 int i3 = 0; double[][] tmp1 = poshidprobs.getArray(); double[][] tmp2 = new double[1][numhid]; Random randomgenerator = new Random(); while (i3 < numhid) { /* * a sampling according to possiblity given by poshidprobs */ if (tmp1[0][i3] > randomgenerator.nextDouble()) tmp2[0][i3] = 1; else tmp2[0][i3] = 0; i3++; } // poshidstates is a binary sampling according to possiblity given by poshidprobs poshidstates = new Matrix(tmp2); }
public static boolean stopIteration(Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path preFile = new Path("preX/Result"); Path curFile = new Path("curX/part-00000"); if (!(fs.exists(preFile) && fs.exists(curFile))) { System.exit(1); } boolean stop = true; String line1, line2; FSDataInputStream in1 = fs.open(preFile); FSDataInputStream in2 = fs.open(curFile); InputStreamReader isr1 = new InputStreamReader(in1); InputStreamReader isr2 = new InputStreamReader(in2); BufferedReader br1 = new BufferedReader(isr1); BufferedReader br2 = new BufferedReader(isr2); while ((line1 = br1.readLine()) != null && (line2 = br2.readLine()) != null) { String[] str1 = line1.split("\\s+"); String[] str2 = line2.split("\\s+"); double preElem = Double.parseDouble(str1[1]); double curElem = Double.parseDouble(str2[1]); if (Math.abs(preElem - curElem) > eps) { stop = false; break; } } if (stop == false) { fs.delete(preFile, true); if (fs.rename(curFile, preFile) == false) { System.exit(1); } } return stop; }
@Override public void reduce( IntWritable key, Iterator<ClusterWritable> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { float sumSimilarity = 0.0f; int numMovies = 0; float avgSimilarity = 0.0f; float similarity = 0.0f; int s = 0; int count; float diff = 0.0f; float minDiff = 1.0f; int candidate = 0; String data = new String(""); String shortline = new String(""); ArrayList<String> arrl = new ArrayList<String>(); ArrayList<Float> simArrl = new ArrayList<Float>(); String oneElm = new String(); int indexShort, index2; Text val = new Text(); while (values.hasNext()) { ClusterWritable cr = (ClusterWritable) values.next(); similarity = cr.similarity; simArrl.addAll(cr.similarities); for (int i = 0; i < cr.movies.size(); i++) { oneElm = cr.movies.get(i); indexShort = oneElm.indexOf( ",", 1000); // to avoid memory error caused by long arrays; it will results less // accurate if (indexShort == -1) { shortline = new String(oneElm); } else { shortline = new String(oneElm.substring(0, indexShort)); } arrl.add(shortline); output.collect(key, new Text(oneElm)); } numMovies += cr.movies.size(); sumSimilarity += similarity; } if (numMovies > 0) { avgSimilarity = sumSimilarity / (float) numMovies; } diff = 0.0f; minDiff = 1.0f; for (s = 0; s < numMovies; s++) { diff = (float) Math.abs(avgSimilarity - simArrl.get(s)); if (diff < minDiff) { minDiff = diff; candidate = s; } } data = arrl.get(candidate); index2 = data.indexOf(":"); String movieStr = data.substring(0, index2); String reviews = data.substring(index2 + 1); StringTokenizer token = new StringTokenizer(reviews, ","); count = 0; while (token.hasMoreTokens()) { token.nextToken(); count++; } System.out.println( "The key = " + key.toString() + " has members = " + numMovies + " simil = " + simArrl.get(candidate)); val = new Text(simArrl.get(candidate) + " " + movieStr + " " + count + " " + reviews); output.collect(key, val); reporter.incrCounter(Counter.VALUES, 1); }
@Override public int getPartition(IntPair key, NullWritable value, int numPartitions) { return Math.abs(key.getFirst() * 127) % numPartitions; }
public static boolean isSameTime(String begin, String end) { long s = Math.abs(DateStringUtils.second(begin, end, DATETIME_STYLE)); return s <= 1800; }
public static double calAlpha(double theta, double dec) { if (Math.abs(dec) + theta > 89.9) return 180; return (double) Math.toDegrees( Math.abs( Math.atan( Math.sin(Math.toRadians(theta)) / Math.sqrt( Math.cos(Math.toRadians(dec - theta)) * Math.cos(Math.toRadians(dec + theta)))))); }
public class NeighborSearch { public static final int numZones = 180; public static final int numBlocks = 360; public static double theta = 1.0 / 60.0; public static double blockWidth = 360.0 / numBlocks; public static double zoneHeight = 180.0 / numZones; private static double blockRanges[][] = new double[numBlocks][2]; private static double zoneRanges[][] = new double[numZones][2]; private static double maxAlphas[] = new double[numZones]; private static double costheta = Math.cos(Math.toRadians(theta)); public static double calAlpha(double theta, double dec) { if (Math.abs(dec) + theta > 89.9) return 180; return (double) Math.toDegrees( Math.abs( Math.atan( Math.sin(Math.toRadians(theta)) / Math.sqrt( Math.cos(Math.toRadians(dec - theta)) * Math.cos(Math.toRadians(dec + theta)))))); } public static void init() { zoneRanges[0][0] = -90; zoneRanges[0][1] = -90 + zoneHeight; for (int i = 1; i < zoneRanges.length; i++) { zoneRanges[i][0] = zoneRanges[i - 1][1]; zoneRanges[i][1] = zoneRanges[i][0] + zoneHeight; } blockRanges[0][0] = 0; blockRanges[0][1] = blockWidth; for (int i = 1; i < blockRanges.length; i++) { blockRanges[i][0] = blockRanges[i - 1][1]; blockRanges[i][1] = blockRanges[i][0] + blockWidth; } for (int i = 0; i < maxAlphas.length; i++) { double maxDec = zoneRanges[i][1]; if (maxDec <= 0) maxDec = zoneRanges[i][0]; maxAlphas[i] = calAlpha(theta, maxDec); } } public static class Map extends MapReduceBase implements Mapper<LongWritable, Star, BlockIDWritable, PairWritable> { /* it seems it's very costly to create an object in Java. * reuse these objects in every map invocation. */ private BlockIDWritable loc = new BlockIDWritable(); private PairWritable p = new PairWritable(); BlockIDWritable loc1 = new BlockIDWritable(); public Map() { init(); } public void map( LongWritable key, Star value, OutputCollector<BlockIDWritable, PairWritable> output, Reporter reporter) throws IOException { loc.set(value.ra, value.dec); int zoneNum = loc.zoneNum; int raNum = loc.raNum; p.set(value, null); /* * When the block size increases (> theta), only part of a block * needs to be copied to its neighbor. */ output.collect(loc, p); /* * only replicate objects in the border of a block. I expect most of * objects don't need to be copied. */ if (value.dec > zoneRanges[zoneNum][0] + theta && value.dec < zoneRanges[zoneNum][1] - theta && value.ra > blockRanges[raNum][0] + maxAlphas[zoneNum] && value.ra < blockRanges[raNum][1] - maxAlphas[zoneNum]) return; /* * the code below is to copy the star to some neighbors. We only * need to copy an object to the bottom, left, left bottom, left top * neighbors */ value.margin = true; /* * we should treat the entire zone 0 as a block, so we only needs to * copy some objects at the corner to their neighbors */ if (loc.zoneNum == 0) { /* copy the object to the right top neighbor */ if (value.ra >= blockRanges[raNum][1] - maxAlphas[zoneNum] && value.ra <= blockRanges[raNum][1] && value.dec >= zoneRanges[zoneNum][1] - theta && value.dec <= zoneRanges[zoneNum][1]) { // BlockIDWritable loc1 = new BlockIDWritable(); /* raNum of objects in zone 0 is always 0, * we need to recalculate it. */ // loc1.raNum = BlockIDWritable.ra2Num(value.ra) + 1; // if (loc1.raNum == numBlocks) { // loc1.raNum = 0; // value.ra -= 360; // } // loc1.zoneNum = loc.zoneNum + 1; /// output.collect(loc1, p); } return; } else if (loc.zoneNum == numZones - 1) { /* copy the object to the bottom neighbor */ if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) { /* raNum of objects in zone zoneNum - 1 is always 0, * we need to recalculate it. */ loc1.raNum = BlockIDWritable.ra2Num(value.ra); loc1.zoneNum = loc.zoneNum - 1; output.collect(loc1, p); /* copy the object to the right bottom neighbor */ while (value.ra >= blockRanges[loc1.raNum][1] - maxAlphas[zoneNum] && value.ra <= blockRanges[loc1.raNum][1]) { loc1.raNum++; if (loc1.raNum == numBlocks) { loc1.raNum = 0; value.ra -= 360; } loc1.zoneNum = loc.zoneNum - 1; output.collect(loc1, p); } } return; } boolean wrap = false; loc1.raNum = loc.raNum; /* copy the object to the right neighbor */ while (value.ra >= blockRanges[loc1.raNum][1] - maxAlphas[zoneNum] && value.ra <= blockRanges[loc1.raNum][1]) { loc1.raNum++; loc1.zoneNum = loc.zoneNum; /* * when the object is copied to the right neighbor, we need to * be careful. we need to convert ra and raNum if ra is close to * 360. */ if (loc1.raNum == numBlocks) { loc1.raNum = 0; value.ra -= 360; wrap = true; } output.collect(loc1, p); /* copy the object to the right bottom neighbor */ if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) { loc1.zoneNum = loc.zoneNum - 1; output.collect(loc1, p); } /* copy the object to the right top neighbor */ if (value.dec >= zoneRanges[zoneNum][1] - theta && value.dec <= zoneRanges[zoneNum][1]) { loc1.zoneNum = loc.zoneNum + 1; output.collect(loc1, p); } } if (wrap) { value.ra += 360; } /* copy the object to the bottom neighbor */ if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) { loc1.raNum = loc.raNum; loc1.zoneNum = loc.zoneNum - 1; if (loc1.zoneNum == 0) loc1.raNum = 0; output.collect(loc1, p); } } } public static class Reduce extends MapReduceBase implements Reducer<BlockIDWritable, PairWritable, BlockIDWritable, PairWritable> { PairWritable p = new PairWritable(); public Reduce() { init(); } void search( Vector<Star> v1, Vector<Star> v2, BlockIDWritable key, OutputCollector<BlockIDWritable, PairWritable> output) throws IOException { for (int i = 0; i < v1.size(); i++) { for (int j = 0; j < v2.size(); j++) { Star star1 = v1.get(i); Star star2 = v2.get(j); // what is this margin about if (star1.margin && star2.margin) continue; double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z; if (dist > costheta) { p.set(star1, star2, dist); output.collect(key, p); p.set(star2, star1, dist); output.collect(key, p); // num += 2; } } } // end for i,j } public void reduce( BlockIDWritable key, Iterator<PairWritable> values, OutputCollector<BlockIDWritable, PairWritable> output, Reporter reporter) throws IOException { // Vector<Star> starV = new Vector<Star>(); int buketsizeX = 0; int buketsizeY = 0; double bwidth = maxAlphas[key.zoneNum]; // ra ,x double bheight = theta; // dec ,y /* add 10 more in each dimension to make sure there is no overflow. */ Vector<Star>[][] arrstarV = new Vector[((int) (zoneHeight / bheight)) + 10] [((int) (blockWidth / bwidth)) + 10]; // create bucket vector[Y][X] int num = 0; while (values.hasNext()) { num++; Star s = values.next().get(0); // participant double posx = (s.ra - blockRanges[key.raNum][0]) / bwidth; int x = (int) posx + 1; // shit by 1 in case star comes from other block double posy = (s.dec - zoneRanges[key.zoneNum][0]) / bheight; int y = (int) posy + 1; // set bucket size as max if (buketsizeX < x) buketsizeX = x; if (buketsizeY < y) buketsizeY = y; // create according bucket if (arrstarV[y][x] == null) // TODO avaoid creating vectors here. arrstarV[y][x] = new Vector<Star>(); // put star into bucket arrstarV[y][x].add(s); } // start reducer int i, j, row, col; // for each bucket for (row = 0; row <= buketsizeY; row++) { for (col = 0; col <= buketsizeX; col++) { // starV.clear(); // construct a new vector to do compare // TODO we need to avoid searching objects in the border. if (arrstarV[row][col] != null) { // old method to generate output for (i = 0; i < arrstarV[row][col].size(); i++) { for (j = i + 1; j < arrstarV[row][col].size(); j++) { Star star1 = arrstarV[row][col].get(i); Star star2 = arrstarV[row][col].get(j); // what is this margin about if (star1.margin && star2.margin) continue; double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z; if (dist > costheta) { p.set(star1, star2, dist); output.collect(key, p); p.set(star2, star1, dist); output.collect(key, p); // num += 2; } } } // end for i,j } // end if else { continue; } // 4 more neighbors // right upper arrstarV[row-1][col+1] vs arrstarV[row][col] if (row != 0 && arrstarV[row - 1][col + 1] != null) { search(arrstarV[row][col], arrstarV[row - 1][col + 1], key, output); } // right arrstarV[row][col+1] vs arrstarV[row][col] if (arrstarV[row][col + 1] != null) { search(arrstarV[row][col], arrstarV[row][col + 1], key, output); } // right lower if (arrstarV[row + 1][col + 1] != null) { search(arrstarV[row][col], arrstarV[row + 1][col + 1], key, output); } // lower if (arrstarV[row + 1][col] != null) { search(arrstarV[row][col], arrstarV[row + 1][col], key, output); } // end if } // end colum } // end row } } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(NeighborSearch.class); conf.setJobName("star searching"); conf.setOutputKeyClass(BlockIDWritable.class); conf.setOutputValueClass(PairWritable.class); conf.setMapperClass(Map.class); // conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); // conf.setPartitionerClass(BlockPartitioner.class); // conf.setFloat("mapred.reduce.slowstart.completed.maps", (float) 1.0); conf.setInputFormat(StarInputFormat.class); conf.setOutputFormat(StarOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); } }