/** * Build the index on the given file. * * @param file , RegionDataFile, should be sorted on key in ascend order * @param blockSize * @param blockCount the approximate blocks in each index entry.This factor is useful to balance * the key/values in each index entry,and also have a influence on how many blocks the system * load when the system try to load data from file system. * @return * @throws IOException */ public static int build( List<IndexEntry> list, BloomFilter filter, String file, int blockSize, int blockCount) throws IOException { if (filter != null) { filter.clear(); } IBlockInputStream in = new KVInputStream(DFSManager.getDFS().open(new Path(file)), blockSize, 0, 0); int keyNum = 0; byte[] prevKey = null, curKey = null; int prevBlock = 0, curBlock = 0; int offset = 0; KeyValue kv = null; try { while (true) { int len = in.readInt(); if (len == 0) { // prevKey == null means the index entry has been flushed // before if (prevKey != null) { list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset)); } in.close(); break; } else { in.skipBytes(-4); } kv = KeyValueIOUtil.readFromExternal(in); keyNum++; curKey = kv.getKey(); if (filter != null) { filter.set(curKey); } if (prevKey == null) { prevKey = curKey; } curBlock = in.getCurrentBlock(); int count = curBlock - prevBlock; if (count >= blockCount || (count == blockCount - 1 && in.getBlockAvailable() < 4)) { list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset)); offset = in.getBlockPos() % blockSize; prevBlock = curBlock; if (count == blockCount - 1) { prevBlock++; offset = 0; } prevKey = null; } } } catch (EOFException e) { if (prevKey != null && curKey != null) { list.add(new IndexEntry(prevKey, curKey, prevBlock, curBlock, offset)); } in.close(); } return keyNum; }