@Test public void testHadoopBuild() throws Exception { // create test data Map<String, String> values = new HashMap<String, String>(); File testDir = TestUtils.createTempDir(); File tempDir = new File(testDir, "temp"), tempDir2 = new File(testDir, "temp2"); File outputDir = new File(testDir, "output"), outputDir2 = new File(testDir, "output2"); File storeDir = TestUtils.createTempDir(testDir); for (int i = 0; i < 200; i++) values.put(Integer.toString(i), Integer.toBinaryString(i)); // write test data to text file File inputFile = File.createTempFile("input", ".txt", testDir); inputFile.deleteOnExit(); StringBuilder contents = new StringBuilder(); for (Map.Entry<String, String> entry : values.entrySet()) contents.append(entry.getKey() + "\t" + entry.getValue() + "\n"); FileUtils.writeStringToFile(inputFile, contents.toString()); String storeName = "test"; SerializerDefinition serDef = new SerializerDefinition("string"); Cluster cluster = ServerTestUtils.getLocalCluster(1); // Test backwards compatibility StoreDefinition def = new StoreDefinitionBuilder() .setName(storeName) .setType(ReadOnlyStorageConfiguration.TYPE_NAME) .setKeySerializer(serDef) .setValueSerializer(serDef) .setRoutingPolicy(RoutingTier.CLIENT) .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY) .setReplicationFactor(1) .setPreferredReads(1) .setRequiredReads(1) .setPreferredWrites(1) .setRequiredWrites(1) .build(); HadoopStoreBuilder builder = new HadoopStoreBuilder( new Configuration(), TextStoreMapper.class, TextInputFormat.class, cluster, def, 64 * 1024, new Path(tempDir2.getAbsolutePath()), new Path(outputDir2.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, saveKeys, false); builder.build(); builder = new HadoopStoreBuilder( new Configuration(), TextStoreMapper.class, TextInputFormat.class, cluster, def, 64 * 1024, new Path(tempDir.getAbsolutePath()), new Path(outputDir.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, saveKeys, false); builder.build(); // Check if checkSum is generated in outputDir File nodeFile = new File(outputDir, "node-0"); // Check if metadata file exists File metadataFile = new File(nodeFile, ".metadata"); Assert.assertTrue(metadataFile.exists()); ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata(metadataFile); if (saveKeys) Assert.assertEquals( metadata.get(ReadOnlyStorageMetadata.FORMAT), ReadOnlyStorageFormat.READONLY_V2.getCode()); else Assert.assertEquals( metadata.get(ReadOnlyStorageMetadata.FORMAT), ReadOnlyStorageFormat.READONLY_V1.getCode()); Assert.assertEquals( metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE), CheckSum.toString(CheckSumType.MD5)); // Check contents of checkSum file byte[] md5 = Hex.decodeHex(((String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM)).toCharArray()); byte[] checkSumBytes = CheckSumTests.calculateCheckSum(nodeFile.listFiles(), CheckSumType.MD5); Assert.assertEquals(0, ByteUtils.compare(checkSumBytes, md5)); // check if fetching works HdfsFetcher fetcher = new HdfsFetcher(); // Fetch to version directory File versionDir = new File(storeDir, "version-0"); fetcher.fetch(nodeFile.getAbsolutePath(), versionDir.getAbsolutePath()); Assert.assertTrue(versionDir.exists()); // open store @SuppressWarnings("unchecked") Serializer<Object> serializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(serDef); ReadOnlyStorageEngine engine = new ReadOnlyStorageEngine( storeName, searchStrategy, new RoutingStrategyFactory().updateRoutingStrategy(def, cluster), 0, storeDir, 1); Store<Object, Object, Object> store = SerializingStore.wrap(engine, serializer, serializer, serializer); // check values for (Map.Entry<String, String> entry : values.entrySet()) { List<Versioned<Object>> found = store.get(entry.getKey(), null); Assert.assertEquals("Incorrect number of results", 1, found.size()); Assert.assertEquals(entry.getValue(), found.get(0).getValue()); } // also check the iterator - first key iterator... try { ClosableIterator<ByteArray> keyIterator = engine.keys(); if (!saveKeys) { fail("Should have thrown an exception since this RO format does not support iterators"); } int numElements = 0; while (keyIterator.hasNext()) { Assert.assertTrue(values.containsKey(serializer.toObject(keyIterator.next().get()))); numElements++; } Assert.assertEquals(numElements, values.size()); } catch (UnsupportedOperationException e) { if (saveKeys) { fail("Should not have thrown an exception since this RO format does support iterators"); } } // ... and entry iterator try { ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entryIterator = engine.entries(); if (!saveKeys) { fail("Should have thrown an exception since this RO format does not support iterators"); } int numElements = 0; while (entryIterator.hasNext()) { Pair<ByteArray, Versioned<byte[]>> entry = entryIterator.next(); Assert.assertEquals( values.get(serializer.toObject(entry.getFirst().get())), serializer.toObject(entry.getSecond().getValue())); numElements++; } Assert.assertEquals(numElements, values.size()); } catch (UnsupportedOperationException e) { if (saveKeys) { fail("Should not have thrown an exception since this RO format does support iterators"); } } }
public String runBuildStore(Props props, String url) throws Exception { int replicationFactor = props.getInt("build.replication.factor", 2); int chunkSize = props.getInt("build.chunk.size", 1024 * 1024 * 1024); Path tempDir = new Path( props.getString( "build.temp.dir", "/tmp/vold-build-and-push-" + new Random().nextLong())); URI uri = new URI(url); Path outputDir = new Path(props.getString("build.output.dir"), uri.getHost()); Path inputPath = getInputPath(); String keySelection = props.getString("build.key.selection", null); String valSelection = props.getString("build.value.selection", null); CheckSumType checkSumType = CheckSum.fromString(props.getString("checksum.type", CheckSum.toString(CheckSumType.MD5))); boolean saveKeys = props.getBoolean("save.keys", true); boolean reducerPerBucket = props.getBoolean("reducer.per.bucket", false); int numChunks = props.getInt("num.chunks", -1); if (isAvroJob) { String recSchema = getRecordSchema(); String keySchema = getKeySchema(); String valSchema = getValueSchema(); new VoldemortStoreBuilderJob( this.getId() + "-build-store", props, new VoldemortStoreBuilderConf( replicationFactor, chunkSize, tempDir, outputDir, inputPath, cluster, storeDefs, storeName, keySelection, valSelection, null, null, checkSumType, saveKeys, reducerPerBucket, numChunks, keyField, valueField, recSchema, keySchema, valSchema), true) .run(); return outputDir.toString(); } new VoldemortStoreBuilderJob( this.getId() + "-build-store", props, new VoldemortStoreBuilderConf( replicationFactor, chunkSize, tempDir, outputDir, inputPath, cluster, storeDefs, storeName, keySelection, valSelection, null, null, checkSumType, saveKeys, reducerPerBucket, numChunks)) .run(); return outputDir.toString(); }