/** Check whether the file list have duplication. */ private static void checkDuplication(FileSystem fs, Path file, Path sorted, Configuration conf) throws IOException { SequenceFile.Reader in = null; try { SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new Text.Comparator(), Text.class, Text.class, conf); sorter.sort(file, sorted); in = new SequenceFile.Reader(fs, sorted, conf); Text prevdst = null, curdst = new Text(); Text prevsrc = null, cursrc = new Text(); for (; in.next(curdst, cursrc); ) { if (prevdst != null && curdst.equals(prevdst)) { throw new DuplicationException( "Invalid input, there are duplicated files in the sources: " + prevsrc + ", " + cursrc); } prevdst = curdst; curdst = new Text(); prevsrc = cursrc; cursrc = new Text(); } } finally { checkAndClose(in); } }
/* * 将文档中心从hdfs中加载至内存 */ protected void setup(Context context) throws IOException, InterruptedException { // 读取中心点向量数据 Configuration conf = context.getConfiguration(); Path cents = new Path(CENT_PATH); // FileSystem fs = FileSystem.get(conf); FileSystem fs = cents.getFileSystem(conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, cents, conf); Text key = new Text(); // 读取题号 Text value = new Text(); // 读取题号对应的单词=TFIDF,单词=TFIDF while (reader.next(key, value)) { Map<String, Double> tfidfAndword = new HashMap<String, Double>(); // 存储TFIDF和单词 String[] strs = null; Pattern p = Pattern.compile("\"([^\"]+)\"=([^,}]+)"); // 正则匹配取出:单词和TFIDF Matcher m = p.matcher(value.toString()); while (m.find()) { strs = m.group().split("="); if (strs.length == 2) { tfidfAndword.put( strs[0].replace("\"", "").trim(), Double.parseDouble(strs[1].replace("}", "").trim())); } } centers.put(key.toString(), tfidfAndword); } reader.close(); super.setup(context); }
/** * @param path * @param job * @param fs * @param dest * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException * @throws IllegalAccessException * @throws InstantiationException */ @SuppressWarnings("deprecation") private void readBinaryBlockMatrixBlocksFromHDFS( Path path, JobConf job, FileSystem fs, Collection<IndexedMatrixValue> dest, long rlen, long clen, int brlen, int bclen) throws IOException { MatrixIndexes key = new MatrixIndexes(); MatrixBlock value = new MatrixBlock(); // set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); for (Path lpath : getSequenceFilePaths(fs, path)) // 1..N files { // directly read from sequence files (individual partfiles) SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job); try { while (reader.next(key, value)) { int row_offset = (int) (key.getRowIndex() - 1) * brlen; int col_offset = (int) (key.getColumnIndex() - 1) * bclen; int rows = value.getNumRows(); int cols = value.getNumColumns(); // bound check per block if (row_offset + rows < 0 || row_offset + rows > rlen || col_offset + cols < 0 || col_offset + cols > clen) { throw new IOException( "Matrix block [" + (row_offset + 1) + ":" + (row_offset + rows) + "," + (col_offset + 1) + ":" + (col_offset + cols) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } // copy block to result dest.add(new IndexedMatrixValue(new MatrixIndexes(key), new MatrixBlock(value))); } } finally { IOUtilFunctions.closeSilently(reader); } } }
/** * return the x*y * * @param url * @return */ public Double[] getR(String url) { List<Double> list = new ArrayList<Double>(); Path path = new Path(url); Configuration conf = HUtils.getConf(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader( conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0)); DoubleArrStrWritable dkey = (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); DoublePairWritable dvalue = (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(dkey, dvalue)) { // 循环读取文件 // list.add(dvalue.getSum()*dvalue.getDistance()); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } Double[] dList = new Double[list.size()]; dList = list.toArray(dList); Arrays.sort(dList); return dList; }
/** determines which files have failed for a given job */ private Set<String> getFailedFiles(Job job) throws IOException { Set<String> failedFiles = new HashSet<String>(); Path outDir = SequenceFileOutputFormat.getOutputPath(job); FileSystem fs = outDir.getFileSystem(getConf()); if (!fs.getFileStatus(outDir).isDir()) { throw new IOException(outDir.toString() + " is not a directory"); } FileStatus[] files = fs.listStatus(outDir); for (FileStatus f : files) { Path fPath = f.getPath(); if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) { LOG.info("opening " + fPath.toString()); SequenceFile.Reader reader = new SequenceFile.Reader(fs, fPath, getConf()); Text key = new Text(); Text value = new Text(); while (reader.next(key, value)) { failedFiles.add(key.toString()); } reader.close(); } } return failedFiles; }
/** debugging TODO remove */ private void readOutputFiles(String jobName, Path outDir) throws IOException { FileSystem fs = outDir.getFileSystem(getConf()); if (!fs.getFileStatus(outDir).isDir()) { throw new IOException(outDir.toString() + " is not a directory"); } FileStatus[] files = fs.listStatus(outDir); for (FileStatus f : files) { Path fPath = f.getPath(); if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) { LOG.info("opening " + fPath.toString()); SequenceFile.Reader reader = new SequenceFile.Reader(fs, fPath, getConf()); Text key = new Text(); Text value = new Text(); while (reader.next(key, value)) { LOG.info("read " + f.getPath().toString()); LOG.info("read: k=" + key.toString() + " v=" + value.toString()); } LOG.info("done reading " + fPath.toString()); reader.close(); } } }
private int readFile() throws IllegalArgumentException, IOException { int count = 0; final FileSystem fs = FileSystem.get(MapReduceTestUtils.getConfiguration()); final FileStatus[] fss = fs.listStatus( new Path( TestUtils.TEMP_DIR + File.separator + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY + "/t1/pairs")); for (final FileStatus ifs : fss) { if (ifs.isFile() && ifs.getPath().toString().matches(".*part-r-0000[0-9]")) { try (SequenceFile.Reader reader = new SequenceFile.Reader( MapReduceTestUtils.getConfiguration(), Reader.file(ifs.getPath()))) { final Text key = new Text(); final Text val = new Text(); while (reader.next(key, val)) { count++; System.err.println(key + "\t" + val); } } } } return count; }
public static void run(Configuration conf, Path input, String outputFile) throws IOException, InstantiationException, IllegalAccessException { Writer writer; if (outputFile == null) { writer = new OutputStreamWriter(System.out); } else { writer = new OutputStreamWriter( new FileOutputStream(new File(outputFile)), Charset.forName("UTF-8")); } try { FileSystem fs = input.getFileSystem(conf); for (FileStatus fst : fs.listStatus(input, new DataPathFilter())) { Path dataPath = fst.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf); try { Text key = reader.getKeyClass().asSubclass(Text.class).newInstance(); DocumentMapping value = new DocumentMapping(); while (reader.next(key, value)) { String docId = value.getDocId(); writer.write(docId + "\t" + key + "\n"); } } finally { reader.close(); } } } finally { writer.close(); } }
private static void finalize( Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return; } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair); ) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }
/** * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate on the * workerID. Without JVM reuse each task refers to a unique workerID, so we will not find any * duplicates. With JVM reuse, however, each slot refers to a workerID, and there are duplicate * filenames due to partial aggregation and overwrite of fname (the RemoteParWorkerMapper ensures * uniqueness of those files independent of the runtime implementation). * * @param job * @param fname * @return * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") public static LocalVariableMap[] readResultFile(JobConf job, String fname) throws DMLRuntimeException, IOException { HashMap<Long, LocalVariableMap> tmp = new HashMap<Long, LocalVariableMap>(); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); LongWritable key = new LongWritable(); // workerID Text value = new Text(); // serialized var header (incl filename) int countAll = 0; for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) { SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job), lpath, job); try { while (reader.next(key, value)) { // System.out.println("key="+key.get()+", value="+value.toString()); if (!tmp.containsKey(key.get())) tmp.put(key.get(), new LocalVariableMap()); Object[] dat = ProgramConverter.parseDataObject(value.toString()); tmp.get(key.get()).put((String) dat[0], (Data) dat[1]); countAll++; } } finally { if (reader != null) reader.close(); } } LOG.debug("Num remote worker results (before deduplication): " + countAll); LOG.debug("Num remote worker results: " + tmp.size()); // create return array return tmp.values().toArray(new LocalVariableMap[0]); }
public static XYSeries getXY(String url) { XYSeries xyseries = new XYSeries(""); Path path = new Path(url); Configuration conf = HUtils.getConf(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader( conf, Reader.file(path), Reader.bufferSize(4096), Reader.start(0)); DoubleArrStrWritable dkey = (DoubleArrStrWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); DoublePairWritable dvalue = (DoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(dkey, dvalue)) { // 循环读取文件 xyseries.add(dvalue.getFirst(), dvalue.getSecond()); } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } return xyseries; }
public void configure(JobConf conf) { numberOfCenters = Integer.valueOf(conf.get("numberOfCenters")); centersDirectory = conf.get("centersReadDirectory"); try { Configuration c = new Configuration(); FileSystem fs = FileSystem.get(c); for (int index = 0; index < numberOfCenters; ++index) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(centersDirectory + "/centers/" + index), c); LongWritable key = new LongWritable(); Point value = new Point(); reader.next(key, value); Point center = (Point) value; centers.add(center); reader.close(); } } catch (IOException e) { // do nothing // I hope this doesn't happen System.out.println("well, damn."); e.printStackTrace(); } }
public static void main(String args[]) throws Exception { String inputDir = "reuters"; int k = 25; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); String vectorsFolder = inputDir + "/tfidf-vectors"; SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(vectorsFolder + "/part-r-00000"), conf); List<Vector> points = new ArrayList<Vector>(); Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { points.add(value.get()); } System.out.println(points.size()); reader.close(); List<Vector> randomPoints = RandomPointsUtil.chooseRandomPoints(points, k); List<Cluster> clusters = new ArrayList<Cluster>(); System.out.println(randomPoints.size()); int clusterId = 0; for (Vector v : randomPoints) { clusters.add(new Cluster(v, clusterId++, new CosineDistanceMeasure())); } List<List<Cluster>> finalClusters = KMeansClusterer.clusterPoints(points, clusters, new CosineDistanceMeasure(), 10, 0.01); for (Cluster cluster : finalClusters.get(finalClusters.size() - 1)) { System.out.println( "Cluster id: " + cluster.getId() + " center: " + cluster.getCenter().asFormatString()); } }
public static Canopies readCanopyCenters(Configuration conf) throws IOException { Canopies canopies = new Canopies(); FileSystem fs = FileSystem.get(conf); Path canopyFileName = new Path(Nasdaq.CANOPY_SEQ_FILE_PATH); // init canopies @SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(fs, canopyFileName, conf); StockVector canopy = new StockVector(); IntWritable value = new IntWritable(); while (reader.next(canopy, value)) { // parse the canopy center StockVector canopyToAdd = new StockVector(canopy); // add to canopy centers canopies.addCanopy(canopyToAdd); } reader.close(); // fs.close(); return canopies; }
public void performSequenceFileRead(FileSystem fs, int count, Path file) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf); ByteWritable key = new ByteWritable(); BytesRefArrayWritable val = new BytesRefArrayWritable(); for (int i = 0; i < count; i++) { reader.next(key, val); } }
public SeqFileInputStream(FileSystem fs, FileStatus f) throws IOException { r = new SequenceFile.Reader(fs, f.getPath(), getConf()); key = ReflectionUtils.newInstance( r.getKeyClass().asSubclass(WritableComparable.class), getConf()); val = ReflectionUtils.newInstance(r.getValueClass().asSubclass(Writable.class), getConf()); inbuf = new DataInputBuffer(); outbuf = new DataOutputBuffer(); }
private List<InputSplit> getSplits( Configuration configuration, int numSplits, long totalSizeBytes) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits); CopyListingFileStatus srcFileStatus = new CopyListingFileStatus(); Text srcRelPath = new Text(); long currentSplitSize = 0; long lastSplitStart = 0; long lastPosition = 0; final Path listingFilePath = getListingFilePath(configuration); if (LOG.isDebugEnabled()) { LOG.debug( "Average bytes per map: " + nBytesPerSplit + ", Number of maps: " + numSplits + ", total size: " + totalSizeBytes); } SequenceFile.Reader reader = null; try { reader = getListingFileReader(configuration); while (reader.next(srcRelPath, srcFileStatus)) { // If adding the current file would cause the bytes per map to exceed // limit. Add the current file to new split if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) { FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null); if (LOG.isDebugEnabled()) { LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize); } splits.add(split); lastSplitStart = lastPosition; currentSplitSize = 0; } currentSplitSize += srcFileStatus.getLen(); lastPosition = reader.getPosition(); } if (lastPosition > lastSplitStart) { FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null); if (LOG.isDebugEnabled()) { LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize); } splits.add(split); } } finally { IOUtils.closeStream(reader); } return splits; }
private static ClusterClassifier readClassifier(Configuration config, Path path, FileSystem fs) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, config); Writable key = new Text(); ClusterClassifier classifierOut = new ClusterClassifier(); try { reader.next(key, classifierOut); } finally { Closeables.closeQuietly(reader); } return classifierOut; }
private static List<IDistanceDensityMul> getIDistanceDensityMulList(String url) throws FileNotFoundException, IOException { Configuration conf = HUtils.getConf(); SequenceFile.Reader reader = null; // 多个文件整合,需排序 List<IDistanceDensityMul> allList = new ArrayList<IDistanceDensityMul>(); // 单个文件 List<IDistanceDensityMul> fileList = new ArrayList<IDistanceDensityMul>(); FileStatus[] fss = HUtils.getHDFSPath(url, "true") .getFileSystem(conf) .listStatus(HUtils.getHDFSPath(url, "true")); for (FileStatus f : fss) { if (!f.toString().contains("part")) { continue; // 排除其他文件 } try { reader = new SequenceFile.Reader( conf, Reader.file(f.getPath()), Reader.bufferSize(4096), Reader.start(0)); // <density_i*min_distancd_j> <first:density_i,second:min_distance_j,third:i> // DoubleWritable, IntDoublePairWritable CustomDoubleWritable dkey = (CustomDoubleWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); IntDoublePairWritable dvalue = (IntDoublePairWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); int i = Utils.GETDRAWPICRECORDS_EVERYFILE; while (reader.next(dkey, dvalue) && i > 0) { // 循环读取文件 i--; fileList.add( new IDistanceDensityMul( dvalue.getSecond(), dvalue.getFirst(), dvalue.getThird(), dkey.get())); // 每个文件都是从小到大排序的 } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } // 整合当前文件的前面若干条记录(Utils.GETDRAWPICRECORDS_EVERYFILE ) if (allList.size() <= 0) { // 第一次可以全部添加 allList.addAll(fileList); } else { combineLists(allList, fileList); } } // for // 第一个点太大了,选择去掉 return allList.subList(1, allList.size()); }
private Map<Text, CopyListingFileStatus> getListing(Path listingPath) throws Exception { SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(listingPath)); Text key = new Text(); CopyListingFileStatus value = new CopyListingFileStatus(); Map<Text, CopyListingFileStatus> values = new HashMap<>(); while (reader.next(key, value)) { values.put(key, value); key = new Text(); value = new CopyListingFileStatus(); } return values; }
private List<String> getKeyFromSequenceFile(FileSystem fs, Path path, Configuration conf) throws Exception { List<String> list = new ArrayList<String>(); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); String next = (String) reader.next((String) null); while (next != null) { list.add(next); next = (String) reader.next((String) null); } reader.close(); return list; }
public static void main(String[] args) throws Exception { // TODO Auto-generated method stub String mapUri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(mapUri), conf); Path map = new Path(mapUri); Path mapData = new Path(mapUri, MapFile.DATA_FILE_NAME); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(mapData)); Class keyClass = reader.getKeyClass(); Class valueClass = reader.getValueClass(); reader.close(); long entries = MapFile.fix(fs, map, keyClass, valueClass, false, conf); System.out.printf("Created MapFile %s with %d entries\n", map, entries); }
private int getMessageCount(LogFilePath logFilePath) throws Exception { String path = logFilePath.getLogFilePath(); Path fsPath = new Path(path); FileSystem fileSystem = FileUtil.getFileSystem(path); SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration()); LongWritable key = (LongWritable) reader.getKeyClass().newInstance(); BytesWritable value = (BytesWritable) reader.getValueClass().newInstance(); int result = 0; while (reader.next(key, value)) { result++; } reader.close(); return result; }
/** * Produce splits such that each is no greater than the quotient of the total size and the * number of splits requested. * * @param job The handle to the JobConf object * @param numSplits Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { int cnfiles = job.getInt(SRC_COUNT_LABEL, -1); long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1); String srcfilelist = job.get(SRC_LIST_LABEL, ""); if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) { throw new RuntimeException( "Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize + ") listuri(" + srcfilelist + ")"); } Path src = new Path(srcfilelist); FileSystem fs = src.getFileSystem(job); FileStatus srcst = fs.getFileStatus(src); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); LongWritable key = new LongWritable(); FilePair value = new FilePair(); final long targetsize = cbsize / numSplits; long pos = 0L; long last = 0L; long acc = 0L; long cbrem = srcst.getLen(); SequenceFile.Reader sl = null; try { sl = new SequenceFile.Reader(fs, src, job); for (; sl.next(key, value); last = sl.getPosition()) { // if adding this split would put this split past the target size, // cut the last split and put this next file in the next split. if (acc + key.get() > targetsize && acc != 0) { long splitsize = last - pos; splits.add(new FileSplit(src, pos, splitsize, (String[]) null)); cbrem -= splitsize; pos = last; acc = 0L; } acc += key.get(); } } finally { checkAndClose(sl); } if (cbrem != 0) { splits.add(new FileSplit(src, pos, cbrem, (String[]) null)); } return splits.toArray(new FileSplit[splits.size()]); }
protected Vector fetchVector(Path p, int keyIndex) throws IOException { if (!fs.exists(p)) { return null; } SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf); IntWritable key = new IntWritable(); VectorWritable vw = new VectorWritable(); while (reader.next(key, vw)) { if (key.get() == keyIndex) { return vw.get(); } } return null; }
private void getOffsets(LogFilePath logFilePath, Set<Long> offsets) throws Exception { String path = logFilePath.getLogFilePath(); Path fsPath = new Path(path); FileSystem fileSystem = FileUtil.getFileSystem(path); SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration()); LongWritable key = (LongWritable) reader.getKeyClass().newInstance(); BytesWritable value = (BytesWritable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { if (!offsets.add(key.get())) { throw new RuntimeException( "duplicate key " + key.get() + " found in file " + logFilePath.getLogFilePath()); } } reader.close(); }
private static Vector loadVector(Configuration conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); Writable key = new IntWritable(); VectorWritable value = new VectorWritable(); try { if (!reader.next(key, value)) { throw new IOException("Input vector file is empty."); } return value.get(); } finally { reader.close(); } }
/** * Return the progress within the input split * * @return 0.0 to 1.0 of the input byte range */ public float getProgress() throws IOException, InterruptedException { if (end == start) { return 0.0f; } else { return Math.min(1.0f, (float) ((in.getPosition() - start) / (double) (end - start))); } }
private MedoidSet readMedoidsSet(Path input, Configuration config) throws IOException, IllegalAccessException, InstantiationException { FileSystem fs = FileSystem.get(config); SequenceFile.Reader reader = new SequenceFile.Reader(fs, input, config); MedoidSet set = new MedoidSet(); try { Writable key = (Writable) reader.getKeyClass().newInstance(); reader.next(key, set); } finally { IOUtils.quietClose(reader); } log.debug("Read initial medoid set:" + set); return set; }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); Path centroids = new Path(conf.get(CENTERS_CONF_KEY)); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, centroids, conf); Centroid key = new Centroid(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { Centroid clusterCenter = new Centroid(key); centers.add(clusterCenter); } reader.close(); }