private static Block createKeyBlock(int positionCount, List<String> keys) { Block keyDictionaryBlock = createSliceArrayBlock(keys); int[] keyIds = new int[positionCount * keys.size()]; for (int i = 0; i < keyIds.length; i++) { keyIds[i] = i % keys.size(); } return new DictionaryBlock( positionCount * keys.size(), keyDictionaryBlock, Slices.wrappedIntArray(keyIds)); }
private static Block createMapBlock(int positionCount, Block keyBlock, Block valueBlock) { InterleavedBlock interleavedBlock = new InterleavedBlock(new Block[] {keyBlock, valueBlock}); int[] offsets = new int[positionCount]; int mapSize = keyBlock.getPositionCount() / positionCount; for (int i = 0; i < positionCount; i++) { offsets[i] = mapSize * 2 * i; } return new ArrayBlock( interleavedBlock, Slices.wrappedIntArray(offsets), 0, Slices.allocate(positionCount)); }
private static Block createDictionaryValueBlock(int positionCount, int mapSize) { double distinctRatio = 0.82; int dictionarySize = (int) (positionCount * mapSize * distinctRatio); List<String> dictionaryStrings = new ArrayList<>(dictionarySize); for (int i = 0; i < dictionarySize; i++) { int wordLength = ThreadLocalRandom.current().nextInt(5, 10); dictionaryStrings.add(randomString(wordLength)); } Block dictionaryBlock = createSliceArrayBlock(dictionaryStrings); int[] keyIds = new int[positionCount * mapSize]; for (int i = 0; i < keyIds.length; i++) { keyIds[i] = ThreadLocalRandom.current().nextInt(0, dictionarySize); } return new DictionaryBlock( positionCount * mapSize, dictionaryBlock, Slices.wrappedIntArray(keyIds)); }
@Override public Block readBlock(Type type) throws IOException { if (!rowGroupOpen) { openRowGroup(); } if (readOffset > 0) { if (presentStream != null) { // skip ahead the present bit reader, but count the set bits // and use this as the skip size for the field readers readOffset = presentStream.countBitsSet(readOffset); } for (StreamReader structField : structFields) { structField.prepareNextRead(readOffset); } } List<Type> typeParameters = type.getTypeParameters(); boolean[] nullVector = new boolean[nextBatchSize]; Block[] blocks = new Block[typeParameters.size()]; if (presentStream == null) { for (int i = 0; i < typeParameters.size(); i++) { StreamReader structField = structFields[i]; structField.prepareNextRead(nextBatchSize); blocks[i] = structField.readBlock(typeParameters.get(i)); } } else { int nullValues = presentStream.getUnsetBits(nextBatchSize, nullVector); if (nullValues != nextBatchSize) { for (int i = 0; i < typeParameters.size(); i++) { StreamReader structField = structFields[i]; structField.prepareNextRead(nextBatchSize - nullValues); blocks[i] = structField.readBlock(typeParameters.get(i)); } } else { for (int i = 0; i < typeParameters.size(); i++) { blocks[i] = typeParameters.get(i).createBlockBuilder(new BlockBuilderStatus(), 0).build(); } } } // Build offsets for array block (null valued have no positions) int[] offsets = new int[nextBatchSize]; offsets[0] = (nullVector[0] ? 0 : typeParameters.size()); for (int i = 1; i < nextBatchSize; i++) { offsets[i] = offsets[i - 1] + (nullVector[i] ? 0 : typeParameters.size()); } // Struct is represented as an array block holding an interleaved block InterleavedBlock interleavedBlock = new InterleavedBlock(blocks); ArrayBlock arrayBlock = new ArrayBlock( interleavedBlock, Slices.wrappedIntArray(offsets), 0, Slices.wrappedBooleanArray(nullVector)); readOffset = 0; nextBatchSize = 0; return arrayBlock; }