/** * Stats for data * * @param config * @param statsFilePath * @param delim * @throws IOException */ public NumericalAttrStatsManager(Configuration config, String statsFilePath, String delim) throws IOException { InputStream fs = Utility.getFileStream(config, statsFilePath); BufferedReader reader = new BufferedReader(new InputStreamReader(fs)); String line = null; String[] items = null; // (0)attr ord (1)cond attr (2)sum (3)sum square (4)count (5)mean (6)variance (7)std dev (8)min // (9)max while ((line = reader.readLine()) != null) { items = line.split(delim); Tuple tuple = new Tuple(); Integer attr = Integer.parseInt(items[0]); tuple.add(Tuple.STRING, items[1]); tuple.add(Tuple.DOUBLE, items[2]); tuple.add(Tuple.DOUBLE, items[3]); tuple.add(Tuple.INT, items[4]); tuple.add(Tuple.DOUBLE, items[5]); tuple.add(Tuple.DOUBLE, items[6]); tuple.add(Tuple.DOUBLE, items[7]); List<Tuple> statList = stats.get(attr); if (null == statList) { statList = new ArrayList<Tuple>(); stats.put(attr, statList); } statList.add(tuple); } }
/** * Stats for keyed data * * @param config * @param statsFilePath * @param delim * @param idOrdinals * @throws IOException */ public NumericalAttrStatsManager( Configuration config, String statsFilePath, String delim, int[] idOrdinals) throws IOException { InputStream fs = Utility.getFileStream(config, statsFilePath); BufferedReader reader = new BufferedReader(new InputStreamReader(fs)); String line = null; String[] items = null; // (0)attr ord (1)cond attr (2)sum (3)sum square (4)count (5)mean (6)variance (7)std dev (8)min // (9)max while ((line = reader.readLine()) != null) { items = line.split(delim); Tuple tuple = new Tuple(); int i = 0; String compKey = Utility.join(items, 0, idOrdinals.length); i += idOrdinals.length; Integer attr = Integer.parseInt(items[i++]); tuple.add(Tuple.STRING, items[i++]); tuple.add(Tuple.DOUBLE, items[i++]); tuple.add(Tuple.DOUBLE, items[i++]); tuple.add(Tuple.INT, items[i++]); tuple.add(Tuple.DOUBLE, items[i++]); tuple.add(Tuple.DOUBLE, items[i++]); tuple.add(Tuple.DOUBLE, items[i++]); // add to map Map<Integer, List<Tuple>> stats = keyedStats.get(compKey); if (null == stats) { stats = new HashMap<Integer, List<Tuple>>(); keyedStats.put(compKey, stats); } List<Tuple> statList = stats.get(attr); if (null == statList) { statList = new ArrayList<Tuple>(); stats.put(attr, statList); } statList.add(tuple); } }