public void verifySchema(String url) throws Exception { // create new json store def with schema from the metadata in the input // path JsonSchema schema = HadoopUtils.getSchemaFromPath(getInputPath()); int replicationFactor = props.getInt("build.replication.factor", 2); int requiredReads = props.getInt("build.required.reads", 1); int requiredWrites = props.getInt("build.required.writes", 1); String description = props.getString("push.store.description", ""); String owners = props.getString("push.store.owners", ""); String keySchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">" + schema.getKeyType() + "</schema-info>\n\t"; String valSchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">" + schema.getValueType() + "</schema-info>\n\t"; boolean hasCompression = false; if (props.containsKey("build.compress.value")) hasCompression = true; if (hasCompression) { valSchema += "\t<compression><type>gzip</type></compression>\n\t"; } if (props.containsKey("build.force.schema.key")) { keySchema = props.get("build.force.schema.key"); } if (props.containsKey("build.force.schema.value")) { valSchema = props.get("build.force.schema.value"); } String newStoreDefXml = VoldemortUtils.getStoreDefXml( storeName, replicationFactor, requiredReads, requiredWrites, props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads") : null, props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes") : null, (props.containsKey("push.force.schema.key")) ? props.getString("push.force.schema.key") : keySchema, (props.containsKey("push.force.schema.value")) ? props.getString("push.force.schema.value") : valSchema, description, owners); log.info("Verifying store: \n" + newStoreDefXml.toString()); StoreDefinition newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml); // get store def from cluster log.info("Getting store definition from: " + url + " (node id " + this.nodeId + ")"); AdminClient adminClient = new AdminClient(url, new AdminClientConfig()); try { List<StoreDefinition> remoteStoreDefs = adminClient.getRemoteStoreDefList(this.nodeId).getValue(); boolean foundStore = false; // go over all store defs and see if one has the same name as the // store we're trying // to build for (StoreDefinition remoteStoreDef : remoteStoreDefs) { if (remoteStoreDef.getName().equals(storeName)) { // if the store already exists, but doesn't match what we // want to push, we need // to worry if (!remoteStoreDef.equals(newStoreDef)) { // it is possible that the stores actually DO match, but // the // json in the key/value serializers is out of order (eg // {'a': 'int32', 'b': 'int32'} could have a/b reversed. // this is just a reflection of the fact that voldemort // json // type defs use hashmaps that are unordered, and pig // uses // bags that are unordered as well. it's therefore // unpredictable what order the keys will come out of // pig. // let's check to see if the key/value serializers are // REALLY equal. SerializerDefinition localKeySerializerDef = newStoreDef.getKeySerializer(); SerializerDefinition localValueSerializerDef = newStoreDef.getValueSerializer(); SerializerDefinition remoteKeySerializerDef = remoteStoreDef.getKeySerializer(); SerializerDefinition remoteValueSerializerDef = remoteStoreDef.getValueSerializer(); if (remoteKeySerializerDef.getName().equals("json") && remoteValueSerializerDef.getName().equals("json") && remoteKeySerializerDef.getAllSchemaInfoVersions().size() == 1 && remoteValueSerializerDef.getAllSchemaInfoVersions().size() == 1) { JsonTypeDefinition remoteKeyDef = JsonTypeDefinition.fromJson(remoteKeySerializerDef.getCurrentSchemaInfo()); JsonTypeDefinition remoteValDef = JsonTypeDefinition.fromJson(remoteValueSerializerDef.getCurrentSchemaInfo()); JsonTypeDefinition localKeyDef = JsonTypeDefinition.fromJson(localKeySerializerDef.getCurrentSchemaInfo()); JsonTypeDefinition localValDef = JsonTypeDefinition.fromJson(localValueSerializerDef.getCurrentSchemaInfo()); if (remoteKeyDef.equals(localKeyDef) && remoteValDef.equals(localValDef)) { String compressionPolicy = ""; if (hasCompression) { compressionPolicy = "\n\t\t<compression><type>gzip</type></compression>"; } // if the key/value serializers are REALLY equal // (even though the strings may not match), then // just use the remote stores to GUARANTEE that // they // match, and try again. newStoreDefXml = VoldemortUtils.getStoreDefXml( storeName, replicationFactor, requiredReads, requiredWrites, props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads") : null, props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes") : null, "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">" + remoteKeySerializerDef.getCurrentSchemaInfo() + "</schema-info>\n\t", "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">" + remoteValueSerializerDef.getCurrentSchemaInfo() + "</schema-info>" + compressionPolicy + "\n\t"); newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml); if (!remoteStoreDef.equals(newStoreDef)) { // if we still get a fail, then we know that // the // store defs don't match for reasons OTHER // than // the key/value serializer throw new RuntimeException( "Your store schema is identical, but the store definition does not match. Have: " + newStoreDef + "\nBut expected: " + remoteStoreDef); } } else { // if the key/value serializers are not equal // (even // in java, not just json strings), then fail throw new RuntimeException( "Your store definition does not match the store definition that is already in the cluster. Tried to resolve identical schemas between local and remote, but failed. Have: " + newStoreDef + "\nBut expected: " + remoteStoreDef); } } } foundStore = true; break; } } // if the store doesn't exist yet, create it if (!foundStore) { // New requirement - Make sure the user had description and // owner specified if (description.length() == 0) { throw new RuntimeException( "Description field missing in store definition. " + "Please add \"push.store.description\" with a line describing your store"); } if (owners.length() == 0) { throw new RuntimeException( "Owner field missing in store definition. " + "Please add \"push.store.owners\" with value being comma-separated list of LinkedIn email ids"); } log.info("Could not find store " + storeName + " on Voldemort. Adding it to all nodes "); adminClient.addStore(newStoreDef); } // don't use newStoreDef because we want to ALWAYS use the JSON // definition since the store builder assumes that you are using // JsonTypeSerializer. This allows you to tweak your value/key store // xml // as you see fit, but still uses the json sequence file meta data // to // build the store. storeDefs = ImmutableList.of( VoldemortUtils.getStoreDef( VoldemortUtils.getStoreDefXml( storeName, replicationFactor, requiredReads, requiredWrites, props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads") : null, props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes") : null, keySchema, valSchema))); cluster = adminClient.getAdminClientCluster(); } finally { adminClient.stop(); } }
/** * Get the sanitized input path. At the moment of writing, this means the #LATEST tag is expanded. */ private Path getInputPath() throws IOException { Path path = new Path(props.getString("build.input.path")); return HadoopUtils.getSanitizedPath(path); }
@Override public void run() throws Exception { boolean build = props.getBoolean("build", true); boolean push = props.getBoolean("push", true); if (build && push && dataDirs.size() != 1) { // Should have only one data directory ( which acts like the parent // directory to all // urls ) throw new RuntimeException( " Should have only one data directory ( which acts like root directory ) since they are auto-generated during build phase "); } else if (!build && push && dataDirs.size() != clusterUrl.size()) { // Number of data directories should be equal to number of cluster // urls throw new RuntimeException( " Since we are only pushing, number of data directories ( comma separated ) should be equal to number of cluster urls "); } // Check every url individually HashMap<String, Exception> exceptions = Maps.newHashMap(); for (int index = 0; index < clusterUrl.size(); index++) { String url = clusterUrl.get(index); log.info("Working on " + url); try { if (isAvroJob) verifyAvroSchema(url); else verifySchema(url); String buildOutputDir; if (build) { buildOutputDir = runBuildStore(props, url); } else { buildOutputDir = dataDirs.get(index); } if (push) { if (log.isDebugEnabled()) log.debug("Informing about push start ..."); informedResults.add( this.informedExecutor.submit( new InformedClient(this.props, "Running", this.getId()))); runPushStore(props, url, buildOutputDir); } if (build && push && !props.getBoolean("build.output.keep", false)) { JobConf jobConf = new JobConf(); if (props.containsKey("hadoop.job.ugi")) { jobConf.set("hadoop.job.ugi", props.getString("hadoop.job.ugi")); } log.info("Deleting " + buildOutputDir); HadoopUtils.deletePathIfExists(jobConf, buildOutputDir); log.info("Deleted " + buildOutputDir); } if (log.isDebugEnabled()) log.debug("Informing about push finish ..."); informedResults.add( this.informedExecutor.submit(new InformedClient(this.props, "Finished", this.getId()))); for (Future result : informedResults) { try { result.get(); } catch (Exception e) { this.log.error("Exception in consumer", e); } } this.informedExecutor.shutdownNow(); } catch (Exception e) { log.error("Exception during build and push for url " + url, e); exceptions.put(url, e); } } if (exceptions.size() > 0) { log.error( "Got exceptions while pushing to " + Joiner.on(",").join(exceptions.keySet()) + " => " + Joiner.on(",").join(exceptions.values())); System.exit(-1); } }