public JobDescriptor( String id, String conicalPath, String fullpath, Props props, ClassLoader classLoader) { this._id = id; this._path = conicalPath; this._fullpath = fullpath; this._props = PropsUtils.resolveProps(props); this._jobType = props.getString(JOB_TYPE, ""); // @TODO Move this validation check in Java Job // if(_jobType.length() == 0 || _jobType.equalsIgnoreCase("java")) { // String className = props.getString(JOB_CLASS); // this._class = Utils.loadClass(className, classLoader); // } this._readResourceLocks = props.getStringList(READ_LOCKS, ","); this._dependencies = new HashSet<JobDescriptor>(); this._retries = props.getInt(RETRIES, 0); this._retryBackoffMs = props.getLong(RETRY_BACKOFF, 0); this._requiredPermits = props.getInt(JOB_PERMITS, 0); this._classLoader = classLoader; this._writeResourceLocks = props.getStringList(WRITE_LOCKS, ","); this._sourceEmailList = props.getString("mail.sender", null); // Ordered resource locking should help prevent simple deadlocking // situations. Collections.sort(this._readResourceLocks); Collections.sort(this._writeResourceLocks); this._emailList = props.getStringList(NOTIFY_EMAIL); }
@Test public void testInitializationFirstFailedSecondRunning() throws Exception { DateTime expectedStartTime = new DateTime(0); DateTime falseStartTime = new DateTime(1); DateTime expectedEndTime = new DateTime(100); EasyMock.expect(mockFlow1.getName()).andReturn("a").once(); EasyMock.expect(mockFlow2.getName()).andReturn("b").once(); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.FAILED).times(2); EasyMock.expect(mockFlow2.getStatus()).andReturn(Status.RUNNING).once(); EasyMock.expect(mockFlow1.getStartTime()).andReturn(falseStartTime).once(); EasyMock.expect(mockFlow1.getEndTime()).andReturn(expectedEndTime).once(); EasyMock.expect(mockFlow2.getStartTime()).andReturn(expectedStartTime).once(); EasyMock.expect(mockFlow2.getEndTime()).andReturn(null).once(); EasyMock.expect(mockFlow1.getParentProps()).andReturn(props).once(); EasyMock.expect(mockFlow2.getParentProps()).andReturn(props).once(); EasyMock.expect(props.equalsProps(props)).andReturn(true).once(); EasyMock.expect(mockFlow1.getName()).andReturn("1").once(); EasyMock.expect(mockFlow2.getName()).andReturn("2").once(); EasyMock.replay(mockFlow1, mockFlow2, props); flow = new GroupedExecutableFlow("blah", mockFlow1, mockFlow2); Assert.assertEquals(Status.FAILED, flow.getStatus()); Assert.assertEquals(expectedStartTime, flow.getStartTime()); Assert.assertEquals(expectedEndTime, flow.getEndTime()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); Assert.assertEquals(props, flow.getParentProps()); }
public void runPushStore(Props props, String url, String dataDir) throws Exception { // For backwards compatibility http timeout = admin timeout int httpTimeoutMs = 1000 * props.getInt("push.http.timeout.seconds", 24 * 60 * 60); long pushVersion = props.getLong("push.version", -1L); if (props.containsKey("push.version.timestamp")) { DateFormat format = new SimpleDateFormat("yyyyMMddHHmmss"); pushVersion = Long.parseLong(format.format(new Date())); } int maxBackoffDelayMs = 1000 * props.getInt("push.backoff.delay.seconds", 60); boolean rollback = props.getBoolean("push.rollback", true); new VoldemortSwapJob( this.getId() + "-push-store", props, new VoldemortSwapConf( cluster, dataDir, storeName, httpTimeoutMs, pushVersion, maxBackoffDelayMs, rollback)) .run(); }
public VoldemortBuildAndPushJob(String name, Props props) { super(name); this.props = props; this.storeName = props.getString("push.store.name").trim(); this.clusterUrl = new ArrayList<String>(); this.dataDirs = new ArrayList<String>(); String clusterUrlText = props.getString("push.cluster"); for (String url : Utils.COMMA_SEP.split(clusterUrlText.trim())) if (url.trim().length() > 0) this.clusterUrl.add(url); if (clusterUrl.size() <= 0) throw new RuntimeException("Number of urls should be atleast 1"); // Support multiple output dirs if the user mentions only "push", no // "build". // If user mentions both then should have only one String dataDirText = props.getString("build.output.dir"); for (String dataDir : Utils.COMMA_SEP.split(dataDirText.trim())) if (dataDir.trim().length() > 0) this.dataDirs.add(dataDir); if (dataDirs.size() <= 0) throw new RuntimeException("Number of data dirs should be atleast 1"); this.nodeId = props.getInt("push.node", 0); this.log = Logger.getLogger(name); this.informedResults = Lists.newArrayList(); this.informedExecutor = Executors.newFixedThreadPool(2); isAvroJob = props.getBoolean("build.type.avro", false); keyField = props.getString("avro.key.field", null); valueField = props.getString("avro.value.field", null); if (isAvroJob) { if (keyField == null) throw new RuntimeException( "The key field must be specified in the properties for the Avro build and push job!"); if (valueField == null) throw new RuntimeException( "The value field must be specified in the properties for the Avro build and push job!"); } }
@Test public void testInitializationBothSucceeded4() throws Exception { DateTime expectedStartTime = new DateTime(0); DateTime falseStartTime = new DateTime(1); DateTime expectedEndTime = new DateTime(100); DateTime falseEndTime = new DateTime(99); EasyMock.expect(mockFlow1.getName()).andReturn("a").once(); EasyMock.expect(mockFlow2.getName()).andReturn("b").once(); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.SUCCEEDED).times(2); EasyMock.expect(mockFlow2.getStatus()).andReturn(Status.SUCCEEDED).times(2); EasyMock.expect(mockFlow1.getStartTime()).andReturn(falseStartTime).once(); EasyMock.expect(mockFlow1.getEndTime()).andReturn(expectedEndTime).once(); EasyMock.expect(mockFlow2.getStartTime()).andReturn(expectedStartTime).once(); EasyMock.expect(mockFlow2.getEndTime()).andReturn(falseEndTime).once(); EasyMock.expect(mockFlow1.getParentProps()).andReturn(props).once(); EasyMock.expect(mockFlow2.getParentProps()).andReturn(props).once(); EasyMock.expect(props.equalsProps(props)).andReturn(true).once(); EasyMock.expect(mockFlow1.getName()).andReturn("1").once(); EasyMock.expect(mockFlow2.getName()).andReturn("2").once(); EasyMock.expect(mockFlow1.getReturnProps()).andReturn(new Props()).once(); EasyMock.expect(mockFlow2.getReturnProps()).andReturn(new Props()).once(); EasyMock.replay(mockFlow1, mockFlow2, props); flow = new GroupedExecutableFlow("blah", mockFlow1, mockFlow2); Assert.assertEquals(Status.SUCCEEDED, flow.getStatus()); Assert.assertEquals(expectedStartTime, flow.getStartTime()); Assert.assertEquals(expectedEndTime, flow.getEndTime()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); Assert.assertEquals(props, flow.getParentProps()); }
@Test public void testInitializationBothRunning() throws Exception { DateTime expectedStartTime = new DateTime(0); DateTime falseStartTime = new DateTime(1); EasyMock.expect(mockFlow1.getName()).andReturn("a").once(); EasyMock.expect(mockFlow2.getName()).andReturn("b").once(); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.RUNNING).times(3); EasyMock.expect(mockFlow2.getStatus()).andReturn(Status.RUNNING).times(3); EasyMock.expect(mockFlow1.getStartTime()).andReturn(falseStartTime).once(); EasyMock.expect(mockFlow2.getStartTime()).andReturn(expectedStartTime).once(); Capture<FlowCallback> callbackCapture1 = new Capture<FlowCallback>(); Capture<FlowCallback> callbackCapture2 = new Capture<FlowCallback>(); mockFlow1.execute(EasyMock.eq(props), EasyMock.capture(callbackCapture1)); mockFlow2.execute(EasyMock.eq(props), EasyMock.capture(callbackCapture2)); EasyMock.expect(mockFlow1.getParentProps()).andReturn(props).once(); EasyMock.expect(mockFlow2.getParentProps()).andReturn(props).once(); EasyMock.expect(props.equalsProps(props)).andReturn(true).once(); EasyMock.expect(mockFlow1.getName()).andReturn("1").once(); EasyMock.expect(mockFlow2.getName()).andReturn("2").once(); EasyMock.replay(mockFlow1, mockFlow2, props); flow = new GroupedExecutableFlow("blah", mockFlow1, mockFlow2); Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(expectedStartTime, flow.getStartTime()); Assert.assertEquals(null, flow.getEndTime()); EasyMock.verify(mockFlow1, mockFlow2, props); EasyMock.reset(mockFlow1, mockFlow2, props); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.RUNNING).once(); EasyMock.expect(mockFlow2.getStatus()).andReturn(Status.SUCCEEDED).once(); EasyMock.replay(mockFlow1, mockFlow2, props); Assert.assertSame(callbackCapture1.getValue(), callbackCapture2.getValue()); callbackCapture1.getValue().completed(Status.SUCCEEDED); Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(expectedStartTime, flow.getStartTime()); Assert.assertEquals(null, flow.getEndTime()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); Assert.assertEquals(props, flow.getParentProps()); EasyMock.verify(mockFlow1, mockFlow2, props); EasyMock.reset(mockFlow1, mockFlow2, props); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.SUCCEEDED).once(); EasyMock.expect(mockFlow2.getStatus()).andReturn(Status.SUCCEEDED).once(); EasyMock.expect(mockFlow1.getReturnProps()).andReturn(new Props()).once(); EasyMock.expect(mockFlow2.getReturnProps()).andReturn(new Props()).once(); EasyMock.replay(mockFlow1, mockFlow2, props); DateTime beforeTheEnd = new DateTime(); callbackCapture2.getValue().completed(Status.SUCCEEDED); Assert.assertEquals(Status.SUCCEEDED, flow.getStatus()); Assert.assertEquals(expectedStartTime, flow.getStartTime()); Assert.assertFalse( String.format( "flow's end time[%s] should be after beforeTheEnd[%s]", flow.getEndTime(), beforeTheEnd), beforeTheEnd.isAfter(flow.getEndTime())); Assert.assertEquals(emptyExceptions, flow.getExceptions()); }
@Test public void testSanity() throws Exception { final AtomicLong numJobsComplete = new AtomicLong(0); /** ** Setup mockFlow1 *** */ final Capture<FlowCallback> flow1Callback = new Capture<FlowCallback>(); mockFlow1.execute(EasyMock.eq(props), EasyMock.capture(flow1Callback)); EasyMock.expectLastCall() .andAnswer( new IAnswer<Void>() { @Override public Void answer() throws Throwable { Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(1, numJobsComplete.incrementAndGet()); flow1Callback.getValue().completed(Status.SUCCEEDED); Assert.assertEquals(Status.RUNNING, flow.getStatus()); return null; } }) .once(); Props mockFlow1Props = new Props(); mockFlow1Props.put("1", "1"); mockFlow1Props.put("2", "1"); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.SUCCEEDED).times(2); EasyMock.expect(mockFlow1.getReturnProps()).andReturn(mockFlow1Props).once(); /** ** Setup mockFlow2 *** */ final Capture<FlowCallback> flow2Callback = new Capture<FlowCallback>(); mockFlow2.execute(EasyMock.eq(props), EasyMock.capture(flow2Callback)); EasyMock.expectLastCall() .andAnswer( new IAnswer<Void>() { @Override public Void answer() throws Throwable { Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(2, numJobsComplete.incrementAndGet()); flow2Callback.getValue().completed(Status.SUCCEEDED); Assert.assertEquals(Status.SUCCEEDED, flow.getStatus()); return null; } }) .once(); EasyMock.expect(mockFlow2.getStatus()) .andAnswer( new IAnswer<Status>() { private volatile AtomicInteger count = new AtomicInteger(0); @Override public Status answer() throws Throwable { switch (count.getAndIncrement()) { case 0: return Status.READY; case 1: return Status.SUCCEEDED; default: Assert.fail("mockFlow2.getStatus() should only be called 2 times."); } return null; } }) .times(2); Props mockFlow2Props = new Props(); mockFlow2Props.put("2", "2"); mockFlow2Props.put("3", "2"); EasyMock.expect(mockFlow2.getReturnProps()).andReturn(mockFlow2Props).once(); EasyMock.expect(props.equalsProps(props)).andReturn(true).once(); EasyMock.replay(mockFlow1, mockFlow2, props); /** ** Start the test *** */ AtomicBoolean callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override public void theCallback(Status status) { Assert.assertEquals(Status.SUCCEEDED, status); Assert.assertEquals(2, numJobsComplete.get()); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.SUCCEEDED, flow.getStatus()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); Assert.assertEquals(props, flow.getParentProps()); callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override protected void theCallback(Status status) { Assert.assertEquals(Status.SUCCEEDED, status); Assert.assertEquals(2, numJobsComplete.get()); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.SUCCEEDED, flow.getStatus()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); Props retProps = flow.getReturnProps(); Assert.assertEquals(3, retProps.size()); Assert.assertEquals("1", retProps.get("1")); Assert.assertEquals("2", retProps.get("2")); Assert.assertEquals("2", retProps.get("3")); EasyMock.verify(props); EasyMock.reset(props); EasyMock.expect(props.equalsProps(props)).andReturn(false).once(); EasyMock.replay(props); boolean exceptionThrown = false; try { flow.execute( props, new FlowCallback() { @Override public void progressMade() {} @Override public void completed(Status status) {} }); } catch (IllegalArgumentException e) { exceptionThrown = true; } Assert.assertTrue( "Expected an IllegalArgumentException to be thrown because props weren't the same.", exceptionThrown); }
@Test public void testAllCallbacksCalled() throws Exception { final AtomicLong numJobsComplete = new AtomicLong(0); final AtomicBoolean executeCallWhileStateWasRunningHadItsCallbackCalled = new AtomicBoolean(false); /** ** Setup mockFlow1 *** */ final Capture<FlowCallback> flow1Callback = new Capture<FlowCallback>(); mockFlow1.execute(EasyMock.eq(props), EasyMock.capture(flow1Callback)); EasyMock.expectLastCall() .andAnswer( new IAnswer<Void>() { @Override public Void answer() throws Throwable { Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(1, numJobsComplete.incrementAndGet()); flow.execute( props, new OneCallFlowCallback(executeCallWhileStateWasRunningHadItsCallbackCalled) { @Override protected void theCallback(Status status) {} }); flow1Callback.getValue().completed(Status.SUCCEEDED); return null; } }) .once(); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.SUCCEEDED).times(2); /** ** Setup mockFlow2 *** */ final Capture<FlowCallback> flow2Callback = new Capture<FlowCallback>(); mockFlow2.execute(EasyMock.eq(props), EasyMock.capture(flow2Callback)); EasyMock.expectLastCall() .andAnswer( new IAnswer<Void>() { @Override public Void answer() throws Throwable { Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(2, numJobsComplete.incrementAndGet()); flow2Callback.getValue().completed(Status.SUCCEEDED); return null; } }) .once(); EasyMock.expect(mockFlow2.getStatus()) .andAnswer( new IAnswer<Status>() { private volatile AtomicInteger count = new AtomicInteger(0); @Override public Status answer() throws Throwable { switch (count.getAndIncrement()) { case 0: return Status.READY; case 1: return Status.SUCCEEDED; default: Assert.fail("mockFlow2.getStatus() should only be called 2 times."); } return null; } }) .times(2); EasyMock.expect(mockFlow1.getReturnProps()).andReturn(new Props()).once(); EasyMock.expect(mockFlow2.getReturnProps()).andReturn(new Props()).once(); EasyMock.expect(props.equalsProps(props)).andReturn(true).times(2); EasyMock.replay(mockFlow1, mockFlow2, props); /** ** Start the test *** */ AtomicBoolean callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override public void theCallback(Status status) { Assert.assertEquals(Status.SUCCEEDED, status); Assert.assertEquals(2, numJobsComplete.get()); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.SUCCEEDED, flow.getStatus()); Assert.assertTrue( "mockFlow1, upon completion, sends another execute() call to the flow. " + "The callback from that execute call was apparently not called.", executeCallWhileStateWasRunningHadItsCallbackCalled.get()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override protected void theCallback(Status status) { Assert.assertEquals(Status.SUCCEEDED, status); Assert.assertEquals(2, numJobsComplete.get()); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.SUCCEEDED, flow.getStatus()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); }
@Test public void testFailureJob2() throws Exception { final AtomicLong numJobsComplete = new AtomicLong(0); /** ** Setup mockFlow1 *** */ final Capture<FlowCallback> flow1Callback = new Capture<FlowCallback>(); mockFlow1.execute(EasyMock.eq(props), EasyMock.capture(flow1Callback)); EasyMock.expectLastCall() .andAnswer( new IAnswer<Void>() { @Override public Void answer() throws Throwable { Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(1, numJobsComplete.incrementAndGet()); flow1Callback.getValue().completed(Status.SUCCEEDED); return null; } }) .once(); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.SUCCEEDED).times(2); EasyMock.expect(mockFlow1.getExceptions()).andReturn(emptyExceptions).times(1); /** ** Setup mockFlow2 *** */ final Capture<FlowCallback> flow2Callback = new Capture<FlowCallback>(); mockFlow2.execute(EasyMock.eq(props), EasyMock.capture(flow2Callback)); EasyMock.expectLastCall() .andAnswer( new IAnswer<Void>() { @Override public Void answer() throws Throwable { Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(2, numJobsComplete.incrementAndGet()); flow2Callback.getValue().completed(Status.FAILED); return null; } }) .once(); EasyMock.expect(mockFlow2.getStatus()) .andAnswer( new IAnswer<Status>() { private volatile AtomicInteger count = new AtomicInteger(0); @Override public Status answer() throws Throwable { switch (count.getAndIncrement()) { case 0: return Status.READY; case 1: return Status.FAILED; default: Assert.fail("mockFlow2.getStatus() should only be called 2 times."); } return null; } }) .times(2); final RuntimeException e1 = new RuntimeException(); final RuntimeException e2 = new RuntimeException(); final Map<String, Throwable> e1s = new HashMap<String, Throwable>(); e1s.put("e1", e1); e1s.put("e2", e2); EasyMock.expect(mockFlow2.getExceptions()).andReturn(e1s).times(1); EasyMock.expect(props.equalsProps(props)).andReturn(true).once(); EasyMock.replay(mockFlow1, mockFlow2, props); /** ** Start the test *** */ AtomicBoolean callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override public void theCallback(Status status) { Assert.assertEquals(Status.FAILED, status); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.FAILED, flow.getStatus()); Assert.assertEquals(e1s, flow.getExceptions()); callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override protected void theCallback(Status status) { Assert.assertEquals(Status.FAILED, status); Assert.assertEquals(2, numJobsComplete.get()); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.FAILED, flow.getStatus()); Assert.assertEquals(e1s, flow.getExceptions()); Assert.assertTrue("Expected to be able to reset the flow", flow.reset()); Assert.assertEquals(Status.READY, flow.getStatus()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); }
@Test public void testFailureJob1() throws Exception { final AtomicLong numJobsComplete = new AtomicLong(0); /** ** Setup mockFlow1 *** */ final Capture<FlowCallback> flow1Callback = new Capture<FlowCallback>(); mockFlow1.execute(EasyMock.eq(props), EasyMock.capture(flow1Callback)); EasyMock.expectLastCall() .andAnswer( new IAnswer<Void>() { @Override public Void answer() throws Throwable { Assert.assertEquals(Status.RUNNING, flow.getStatus()); Assert.assertEquals(1, numJobsComplete.incrementAndGet()); flow1Callback.getValue().completed(Status.FAILED); return null; } }) .once(); EasyMock.expect(mockFlow1.getStatus()).andReturn(Status.FAILED).times(1); EasyMock.expect(mockFlow1.getExceptions()).andReturn(theExceptions).times(1); EasyMock.expect(props.equalsProps(props)).andReturn(true).once(); /** ** Setup mockFlow2 *** */ EasyMock.expect(mockFlow2.getExceptions()).andReturn(emptyExceptions).times(1); EasyMock.replay(mockFlow1, mockFlow2, props); /** ** Start the test *** */ AtomicBoolean callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override public void theCallback(Status status) { Assert.assertEquals(Status.FAILED, status); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.FAILED, flow.getStatus()); Assert.assertEquals(theExceptions, flow.getExceptions()); callbackRan = new AtomicBoolean(false); flow.execute( props, new OneCallFlowCallback(callbackRan) { @Override protected void theCallback(Status status) { Assert.assertEquals(Status.FAILED, status); } }); Assert.assertTrue("Callback wasn't run.", callbackRan.get()); Assert.assertEquals(Status.FAILED, flow.getStatus()); Assert.assertEquals(theExceptions, flow.getExceptions()); Assert.assertTrue("Expected to be able to reset the flow", flow.reset()); Assert.assertEquals(Status.READY, flow.getStatus()); Assert.assertEquals(emptyExceptions, flow.getExceptions()); }
// Verify if the new avro schema being pushed is the same one as the old one // Does not have logic to check for Avro schema evolution yet public void verifyAvroSchema(String url) throws Exception { // create new n store def with schema from the metadata in the input // path Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath()); int replicationFactor = props.getInt("build.replication.factor", 2); int requiredReads = props.getInt("build.required.reads", 1); int requiredWrites = props.getInt("build.required.writes", 1); String description = props.getString("push.store.description", ""); String owners = props.getString("push.store.owners", ""); String keySchema = "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">" + schema.getField(keyField).schema() + "</schema-info>\n\t"; String valSchema = "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">" + schema.getField(valueField).schema() + "</schema-info>\n\t"; boolean hasCompression = false; if (props.containsKey("build.compress.value")) hasCompression = true; if (hasCompression) { valSchema += "\t<compression><type>gzip</type></compression>\n\t"; } if (props.containsKey("build.force.schema.key")) { keySchema = props.get("build.force.schema.key"); } if (props.containsKey("build.force.schema.value")) { valSchema = props.get("build.force.schema.value"); } String newStoreDefXml = VoldemortUtils.getStoreDefXml( storeName, replicationFactor, requiredReads, requiredWrites, props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads") : null, props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes") : null, (props.containsKey("push.force.schema.key")) ? props.getString("push.force.schema.key") : keySchema, (props.containsKey("push.force.schema.value")) ? props.getString("push.force.schema.value") : valSchema, description, owners); log.info("Verifying store: \n" + newStoreDefXml.toString()); StoreDefinition newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml); // get store def from cluster log.info("Getting store definition from: " + url + " (node id " + this.nodeId + ")"); AdminClient adminClient = new AdminClient(url, new AdminClientConfig()); try { List<StoreDefinition> remoteStoreDefs = adminClient.getRemoteStoreDefList(this.nodeId).getValue(); boolean foundStore = false; // go over all store defs and see if one has the same name as the // store we're trying // to build for (StoreDefinition remoteStoreDef : remoteStoreDefs) { if (remoteStoreDef.getName().equals(storeName)) { // if the store already exists, but doesn't match what we // want to push, we need // to worry if (!remoteStoreDef.equals(newStoreDef)) { // let's check to see if the key/value serializers are // REALLY equal. SerializerDefinition localKeySerializerDef = newStoreDef.getKeySerializer(); SerializerDefinition localValueSerializerDef = newStoreDef.getValueSerializer(); SerializerDefinition remoteKeySerializerDef = remoteStoreDef.getKeySerializer(); SerializerDefinition remoteValueSerializerDef = remoteStoreDef.getValueSerializer(); if (remoteKeySerializerDef.getName().equals("avro-generic") && remoteValueSerializerDef.getName().equals("avro-generic") && remoteKeySerializerDef.getAllSchemaInfoVersions().size() == 1 && remoteValueSerializerDef.getAllSchemaInfoVersions().size() == 1) { Schema remoteKeyDef = Schema.parse(remoteKeySerializerDef.getCurrentSchemaInfo()); Schema remoteValDef = Schema.parse(remoteValueSerializerDef.getCurrentSchemaInfo()); Schema localKeyDef = Schema.parse(localKeySerializerDef.getCurrentSchemaInfo()); Schema localValDef = Schema.parse(localValueSerializerDef.getCurrentSchemaInfo()); if (remoteKeyDef.equals(localKeyDef) && remoteValDef.equals(localValDef)) { String compressionPolicy = ""; if (hasCompression) { compressionPolicy = "\n\t\t<compression><type>gzip</type></compression>"; } // if the key/value serializers are REALLY equal // (even though the strings may not match), then // just use the remote stores to GUARANTEE that // they // match, and try again. newStoreDefXml = VoldemortUtils.getStoreDefXml( storeName, replicationFactor, requiredReads, requiredWrites, props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads") : null, props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes") : null, "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">" + remoteKeySerializerDef.getCurrentSchemaInfo() + "</schema-info>\n\t", "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">" + remoteValueSerializerDef.getCurrentSchemaInfo() + "</schema-info>" + compressionPolicy + "\n\t"); newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml); if (!remoteStoreDef.equals(newStoreDef)) { // if we still get a fail, then we know that // the // store defs don't match for reasons OTHER // than // the key/value serializer throw new RuntimeException( "Your store schema is identical, but the store definition does not match. Have: " + newStoreDef + "\nBut expected: " + remoteStoreDef); } } else { // if the key/value serializers are not equal // (even // in java, not just json strings), then fail throw new RuntimeException( "Your store definition does not match the store definition that is already in the cluster. Tried to resolve identical schemas between local and remote, but failed. Have: " + newStoreDef + "\nBut expected: " + remoteStoreDef); } } } foundStore = true; break; } } // if the store doesn't exist yet, create it if (!foundStore) { // New requirement - Make sure the user had description and // owner specified if (description.length() == 0) { throw new RuntimeException( "Description field missing in store definition. " + "Please add \"push.store.description\" with a line describing your store"); } if (owners.length() == 0) { throw new RuntimeException( "Owner field missing in store definition. " + "Please add \"push.store.owners\" with value being comma-separated list of LinkedIn email ids"); } log.info("Could not find store " + storeName + " on Voldemort. Adding it to all nodes "); adminClient.addStore(newStoreDef); } storeDefs = ImmutableList.of( VoldemortUtils.getStoreDef( VoldemortUtils.getStoreDefXml( storeName, replicationFactor, requiredReads, requiredWrites, props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads") : null, props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes") : null, keySchema, valSchema))); cluster = adminClient.getAdminClientCluster(); } finally { adminClient.stop(); } }
/** * Get the sanitized input path. At the moment of writing, this means the #LATEST tag is expanded. */ private Path getInputPath() throws IOException { Path path = new Path(props.getString("build.input.path")); return HadoopUtils.getSanitizedPath(path); }
public String runBuildStore(Props props, String url) throws Exception { int replicationFactor = props.getInt("build.replication.factor", 2); int chunkSize = props.getInt("build.chunk.size", 1024 * 1024 * 1024); Path tempDir = new Path( props.getString( "build.temp.dir", "/tmp/vold-build-and-push-" + new Random().nextLong())); URI uri = new URI(url); Path outputDir = new Path(props.getString("build.output.dir"), uri.getHost()); Path inputPath = getInputPath(); String keySelection = props.getString("build.key.selection", null); String valSelection = props.getString("build.value.selection", null); CheckSumType checkSumType = CheckSum.fromString(props.getString("checksum.type", CheckSum.toString(CheckSumType.MD5))); boolean saveKeys = props.getBoolean("save.keys", true); boolean reducerPerBucket = props.getBoolean("reducer.per.bucket", false); int numChunks = props.getInt("num.chunks", -1); if (isAvroJob) { String recSchema = getRecordSchema(); String keySchema = getKeySchema(); String valSchema = getValueSchema(); new VoldemortStoreBuilderJob( this.getId() + "-build-store", props, new VoldemortStoreBuilderConf( replicationFactor, chunkSize, tempDir, outputDir, inputPath, cluster, storeDefs, storeName, keySelection, valSelection, null, null, checkSumType, saveKeys, reducerPerBucket, numChunks, keyField, valueField, recSchema, keySchema, valSchema), true) .run(); return outputDir.toString(); } new VoldemortStoreBuilderJob( this.getId() + "-build-store", props, new VoldemortStoreBuilderConf( replicationFactor, chunkSize, tempDir, outputDir, inputPath, cluster, storeDefs, storeName, keySelection, valSelection, null, null, checkSumType, saveKeys, reducerPerBucket, numChunks)) .run(); return outputDir.toString(); }
@Override public void run() throws Exception { boolean build = props.getBoolean("build", true); boolean push = props.getBoolean("push", true); if (build && push && dataDirs.size() != 1) { // Should have only one data directory ( which acts like the parent // directory to all // urls ) throw new RuntimeException( " Should have only one data directory ( which acts like root directory ) since they are auto-generated during build phase "); } else if (!build && push && dataDirs.size() != clusterUrl.size()) { // Number of data directories should be equal to number of cluster // urls throw new RuntimeException( " Since we are only pushing, number of data directories ( comma separated ) should be equal to number of cluster urls "); } // Check every url individually HashMap<String, Exception> exceptions = Maps.newHashMap(); for (int index = 0; index < clusterUrl.size(); index++) { String url = clusterUrl.get(index); log.info("Working on " + url); try { if (isAvroJob) verifyAvroSchema(url); else verifySchema(url); String buildOutputDir; if (build) { buildOutputDir = runBuildStore(props, url); } else { buildOutputDir = dataDirs.get(index); } if (push) { if (log.isDebugEnabled()) log.debug("Informing about push start ..."); informedResults.add( this.informedExecutor.submit( new InformedClient(this.props, "Running", this.getId()))); runPushStore(props, url, buildOutputDir); } if (build && push && !props.getBoolean("build.output.keep", false)) { JobConf jobConf = new JobConf(); if (props.containsKey("hadoop.job.ugi")) { jobConf.set("hadoop.job.ugi", props.getString("hadoop.job.ugi")); } log.info("Deleting " + buildOutputDir); HadoopUtils.deletePathIfExists(jobConf, buildOutputDir); log.info("Deleted " + buildOutputDir); } if (log.isDebugEnabled()) log.debug("Informing about push finish ..."); informedResults.add( this.informedExecutor.submit(new InformedClient(this.props, "Finished", this.getId()))); for (Future result : informedResults) { try { result.get(); } catch (Exception e) { this.log.error("Exception in consumer", e); } } this.informedExecutor.shutdownNow(); } catch (Exception e) { log.error("Exception during build and push for url " + url, e); exceptions.put(url, e); } } if (exceptions.size() > 0) { log.error( "Got exceptions while pushing to " + Joiner.on(",").join(exceptions.keySet()) + " => " + Joiner.on(",").join(exceptions.values())); System.exit(-1); } }