@Test public void reserveTest() throws Exception { // Reserve on top tier long blockId = 100; BlockStoreLocation tier0 = BlockStoreLocation.anyDirInTier(StorageLevelAlias.MEM.getValue()); for (int i = 0; i < 3; i++) { TieredBlockStoreTestUtils.cache(SESSION_ID, blockId++, BLOCK_SIZE, mBlockStore, tier0); } CommonUtils.sleepMs( WorkerContext.getConf().getLong(Constants.WORKER_SPACE_RESERVER_INTERVAL_MS)); BlockStoreMeta storeMeta = mBlockStore.getBlockStoreMeta(); Assert.assertEquals(3 * BLOCK_SIZE, storeMeta.getUsedBytes()); List<Long> usedBytesOnTiers = storeMeta.getUsedBytesOnTiers(); Assert.assertEquals( 2 * BLOCK_SIZE, (long) usedBytesOnTiers.get(StorageLevelAlias.MEM.getValue() - 1)); Assert.assertEquals( BLOCK_SIZE, (long) usedBytesOnTiers.get(StorageLevelAlias.HDD.getValue() - 1)); // Reserve on under tier for (int i = 0; i < 7; i++) { TieredBlockStoreTestUtils.cache(SESSION_ID, blockId++, BLOCK_SIZE, mBlockStore, tier0); } CommonUtils.sleepMs( WorkerContext.getConf().getLong(Constants.WORKER_SPACE_RESERVER_INTERVAL_MS)); storeMeta = mBlockStore.getBlockStoreMeta(); Assert.assertEquals(9 * BLOCK_SIZE, storeMeta.getUsedBytes()); usedBytesOnTiers = storeMeta.getUsedBytesOnTiers(); Assert.assertEquals( 2 * BLOCK_SIZE, (long) usedBytesOnTiers.get(StorageLevelAlias.MEM.getValue() - 1)); Assert.assertEquals( 7 * BLOCK_SIZE, (long) usedBytesOnTiers.get(StorageLevelAlias.HDD.getValue() - 1)); }
@Override public void run() { long lastHeartbeatMs = System.currentTimeMillis(); Command cmd = null; while (!mStop) { long diff = System.currentTimeMillis() - lastHeartbeatMs; if (diff < WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS) { LOG.debug("Heartbeat process takes {} ms.", diff); CommonUtils.sleepMs(LOG, WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS - diff); } else { LOG.error("Heartbeat process takes " + diff + " ms."); } try { cmd = mWorkerStorage.heartbeat(); lastHeartbeatMs = System.currentTimeMillis(); } catch (IOException e) { LOG.error(e.getMessage(), e); mWorkerStorage.resetMasterClient(); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); cmd = null; if (System.currentTimeMillis() - lastHeartbeatMs >= WorkerConf.get().HEARTBEAT_TIMEOUT_MS) { throw new RuntimeException( "Timebeat timeout " + (System.currentTimeMillis() - lastHeartbeatMs) + "ms"); } } if (cmd != null) { switch (cmd.mCommandType) { case Unknown: LOG.error("Unknown command: " + cmd); break; case Nothing: LOG.debug("Nothing command: {}", cmd); break; case Register: LOG.info("Register command: " + cmd); mWorkerStorage.register(); break; case Free: mWorkerStorage.freeBlocks(cmd.mData); LOG.info("Free command: " + cmd); break; case Delete: LOG.info("Delete command: " + cmd); break; default: throw new RuntimeException("Un-recognized command from master " + cmd.toString()); } } mWorkerStorage.checkStatus(); } }
public static void main(String[] args) throws Exception { LocalTachyonCluster cluster = new LocalTachyonCluster(100, 8 * Constants.MB, Constants.GB); cluster.start(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster.stop(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster = new LocalTachyonCluster(100, 8 * Constants.MB, Constants.GB); cluster.start(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster.stop(); CommonUtils.sleepMs(Constants.SECOND_MS); }
@Test public void singleMasterJournalCrashIntegrationTest() throws Exception { LocalTachyonCluster cluster = setupSingleMasterCluster(); CommonUtils.sleepMs(TEST_TIME_MS); // Shutdown the cluster cluster.stopTFS(); CommonUtils.sleepMs(TEST_TIME_MS); // Ensure the client threads are stopped. mExecutorsForClient.shutdown(); mExecutorsForClient.awaitTermination(TEST_TIME_MS, TimeUnit.MILLISECONDS); reproduceAndCheckState(mCreateFileThread.getSuccessNum()); // clean up cluster.stopUFS(); }
@Test public void createPathTest() throws Exception { // save the last mod time of the root long lastModTime = mTree.getRoot().getLastModificationTimeMs(); // sleep to ensure a different last modification time CommonUtils.sleepMs(10); // create nested directory InodeTree.CreatePathResult createResult = mTree.createPath(NESTED_URI, sNestedDirectoryOptions); List<Inode> modified = createResult.getModified(); List<Inode> created = createResult.getCreated(); // 1 modified directory Assert.assertEquals(1, modified.size()); Assert.assertEquals("", modified.get(0).getName()); Assert.assertNotEquals(lastModTime, modified.get(0).getLastModificationTimeMs()); // 2 created directories Assert.assertEquals(2, created.size()); Assert.assertEquals("nested", created.get(0).getName()); Assert.assertEquals("test", created.get(1).getName()); // save the last mod time of 'test' lastModTime = created.get(1).getLastModificationTimeMs(); // sleep to ensure a different last modification time CommonUtils.sleepMs(10); // creating the directory path again results in no new inodes. try { createResult = mTree.createPath(NESTED_URI, sNestedDirectoryOptions); Assert.assertTrue("createPath should throw FileAlreadyExistsException", false); } catch (FileAlreadyExistsException faee) { Assert.assertEquals( faee.getMessage(), ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(NESTED_URI)); } // create a file CreatePathOptions options = new CreatePathOptions.Builder(MasterContext.getConf()) .setBlockSizeBytes(Constants.KB) .setRecursive(true) .build(); createResult = mTree.createPath(NESTED_FILE_URI, options); modified = createResult.getModified(); created = createResult.getCreated(); // test directory was modified Assert.assertEquals(1, modified.size()); Assert.assertEquals("test", modified.get(0).getName()); Assert.assertNotEquals(lastModTime, modified.get(0).getLastModificationTimeMs()); // file was created Assert.assertEquals(1, created.size()); Assert.assertEquals("file", created.get(0).getName()); }
// Tests that deletes go through despite failing initially due to concurrent read @Test public void deleteWhileReadTest() throws Exception { TachyonFile file = TachyonFSTestUtils.createByteFile( mTFS, "/test1", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, MEM_CAPACITY_BYTES); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); Assert.assertTrue(mTFS.getInfo(file).getInMemoryPercentage() == 100); // Open the file InStreamOptions options = new InStreamOptions.Builder(new TachyonConf()) .setTachyonStorageType(TachyonStorageType.STORE) .build(); FileInStream in = mTFS.getInStream(file, options); Assert.assertEquals(0, in.read()); // Delete the file mTFS.delete(file); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); // After the delete, the master should no longer serve the file Assert.assertNull(mTFS.open(new TachyonURI("/test1"))); // However, the previous read should still be able to read it as the data still exists byte[] res = new byte[MEM_CAPACITY_BYTES]; Assert.assertEquals(MEM_CAPACITY_BYTES - 1, in.read(res, 1, MEM_CAPACITY_BYTES - 1)); res[0] = 0; Assert.assertTrue(BufferUtils.equalIncreasingByteArray(MEM_CAPACITY_BYTES, res)); in.close(); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); // After the file is closed, the master's delete should go through and new files can be created TachyonFile newFile = TachyonFSTestUtils.createByteFile( mTFS, "/test2", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, MEM_CAPACITY_BYTES); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); Assert.assertTrue(mTFS.getInfo(newFile).getInMemoryPercentage() == 100); }
/** * Keep creating files until something crashes or fail to create. Record how many files are * created successfully. */ @Override public void run() { try { // This infinity loop will be broken if something crashes or fail to create. This is // expected since the master will shutdown at a certain time. while (true) { if (mOpType == 0) { try { mFileSystem.createFile(new TachyonURI(TEST_FILE_DIR + mSuccessNum)).close(); } catch (IOException e) { break; } } else if (mOpType == 1) { // TODO(gene): Add this back when there is new RawTable client API. // if (mFileSystem.createRawTable(new TachyonURI(TEST_TABLE_DIR + mSuccessNum), 1) == // -1) { // break; // } } // The create operation may succeed at the master side but still returns false due to the // shutdown. So the mSuccessNum may be less than the actual success number. mSuccessNum++; CommonUtils.sleepMs(100); } } catch (Exception e) { // Something crashed. Stop the thread. } }
/** * Stops the block worker. This method should only be called to terminate the worker. * * @throws IOException if the data server fails to close */ public void stop() throws IOException { mDataServer.close(); mThriftServer.stop(); mThriftServerSocket.close(); mSessionCleanerThread.stop(); mBlockMasterClient.close(); if (mSpaceReserver != null) { mSpaceReserver.stop(); } mFileSystemMasterClient.close(); // Use shutdownNow because HeartbeatThreads never finish until they are interrupted getExecutorService().shutdownNow(); mWorkerMetricsSystem.stop(); try { mWebServer.shutdownWebServer(); } catch (Exception e) { LOG.error("Failed to stop web server", e); } mBlockDataManager.stop(); while (!mDataServer.isClosed() || mThriftServer.isServing()) { // The reason to stop and close again is due to some issues in Thrift. mDataServer.close(); mThriftServer.stop(); mThriftServerSocket.close(); CommonUtils.sleepMs(100); } }
// TODO: Rethink the approach of this test and what it should be testing // @Test public void deleteDuringEvictionTest() throws IOException { // This test may not trigger eviction each time, repeat it 20 times. for (int i = 0; i < 20; i++) { deleteDuringEviction(i); CommonUtils.sleepMs(2 * HEARTBEAT_INTERVAL_MS); // ensure second delete completes } }
@Test public void freeTest() throws IOException { TachyonFSTestUtils.createByteFile(mTfs, "/testFile", WriteType.MUST_CACHE, 10); mFsShell.free(new String[] {"free", "/testFile"}); TachyonConf tachyonConf = mLocalTachyonCluster.getMasterTachyonConf(); CommonUtils.sleepMs(null, CommonUtils.getToMasterHeartBeatIntervalMs(tachyonConf) * 2 + 10); Assert.assertFalse(mTfs.getFile(new TachyonURI("/testFile")).isInMemory()); }
@Ignore @Test public void multiMasterJournalCrashIntegrationTest() throws Exception { LocalTachyonClusterMultiMaster cluster = setupMultiMasterCluster(); // Kill the leader one by one. for (int kills = 0; kills < TEST_NUM_MASTERS; kills++) { CommonUtils.sleepMs(TEST_TIME_MS); Assert.assertTrue(cluster.killLeader()); } cluster.stopTFS(); CommonUtils.sleepMs(TEST_TIME_MS); // Ensure the client threads are stopped. mExecutorsForClient.shutdown(); while (!mExecutorsForClient.awaitTermination(TEST_TIME_MS, TimeUnit.MILLISECONDS)) {} reproduceAndCheckState(mCreateFileThread.getSuccessNum()); // clean up cluster.stopUFS(); }
/** * Stop the current Tachyon cluster. This is used for preparation and clean up. To crash the * Master, use <code>killMaster</code>. */ private static void stopCluster() { String stopClusterCommand = new TachyonConf().get(Constants.TACHYON_HOME) + "/bin/tachyon-stop.sh"; try { Runtime.getRuntime().exec(stopClusterCommand).waitFor(); CommonUtils.sleepMs(LOG, 1000); } catch (Exception e) { LOG.error("Error when stop Tachyon cluster", e); } }
@Test public void freeTest() throws IOException, TException { TachyonFile file = TachyonFSTestUtils.createByteFile( mTfs, "/testFile", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, 10); mFsShell.run(new String[] {"free", "/testFile"}); TachyonConf tachyonConf = mLocalTachyonCluster.getMasterTachyonConf(); CommonUtils.sleepMs(tachyonConf.getInt(Constants.WORKER_TO_MASTER_HEARTBEAT_INTERVAL_MS)); Assert.assertFalse(mTfs.getInfo(file).getInMemoryPercentage() == 100); }
// Tests that pinning a file and then unpinning @Test public void unpinFileTest() throws Exception { // Create a file that fills the entire Tachyon store TachyonFile file1 = TachyonFSTestUtils.createByteFile( mTFS, "/test1", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, MEM_CAPACITY_BYTES); // Pin the file mTFS.setState(file1, mSetPinned); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); // Confirm the pin with master Assert.assertTrue(mTFS.getInfo(file1).isIsPinned()); // Unpin the file mTFS.setState(file1, mSetUnpinned); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); // Confirm the unpin Assert.assertFalse(mTFS.getInfo(file1).isIsPinned()); // Try to create a file that cannot be stored unless the previous file is evicted, this // should succeed TachyonFile file2 = TachyonFSTestUtils.createByteFile( mTFS, "/test2", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, MEM_CAPACITY_BYTES); // File 2 should be in memory and File 1 should be evicted CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); Assert.assertFalse(mTFS.getInfo(file1).getInMemoryPercentage() == 100); Assert.assertTrue(mTFS.getInfo(file2).getInMemoryPercentage() == 100); }
/** Register this TachyonWorker to the TachyonMaster */ public void register() { long id = 0; while (id == 0) { try { id = mMasterClient.worker_register( mWorkerAddress, mSpaceCounter.getCapacityBytes(), mSpaceCounter.getUsedBytes(), new ArrayList<Long>(mMemoryData)); } catch (BlockInfoException e) { LOG.error(e.getMessage(), e); id = 0; CommonUtils.sleepMs(LOG, Constants.SECOND_MS); } catch (IOException e) { LOG.error(e.getMessage(), e); id = 0; CommonUtils.sleepMs(LOG, Constants.SECOND_MS); } } mWorkerId = id; }
/** * Keep requesting to Master until something crashes or fail to create. Record how many * operations are performed successfully. */ @Override public void run() { // This infinity loop will be broken when the master is crashed and the client needs to stop. while (true) { synchronized (this) { if (mIsStopped) { break; } } try { TachyonURI testURI = new TachyonURI(mWorkDir + mSuccessNum); if (ClientOpType.CREATE_FILE == mOpType) { sTfs.getOutStream(testURI, sOutStreamOptions).close(); } else if (ClientOpType.CREATE_DELETE_FILE == mOpType) { try { sTfs.getOutStream(testURI, sOutStreamOptions).close(); } catch (TachyonException e) { // If file already exists, ignore it. if (e.getType() != TachyonExceptionType.FILE_ALREADY_EXISTS) { throw e; } } catch (Exception e) { throw e; } sTfs.delete(sTfs.open(testURI)); } else if (ClientOpType.CREATE_RENAME_FILE == mOpType) { try { sTfs.getOutStream(testURI, sOutStreamOptions).close(); } catch (TachyonException e) { // If file already exists, ignore it. if (e.getType() != TachyonExceptionType.FILE_ALREADY_EXISTS) { throw e; } } catch (Exception e) { throw e; } sTfs.rename(sTfs.open(testURI), new TachyonURI(testURI + "-rename")); } else if (ClientOpType.CREATE_TABLE == mOpType) { if (sOldTfs.createRawTable(new TachyonURI(mWorkDir + mSuccessNum), 1) == -1) { break; } } } catch (Exception e) { // Since master may crash/restart for several times, so this exception is expected. // Ignore the exception and still keep requesting to master. continue; } mSuccessNum++; CommonUtils.sleepMs(100); } }
/** * Stop this TachyonWorker. Stop all the threads belong to this TachyonWorker. * * @throws IOException * @throws InterruptedException */ public void stop() throws IOException, InterruptedException { mStop = true; mWorkerStorage.stop(); mDataServer.close(); mServer.stop(); mServerTNonblockingServerSocket.close(); mExecutorService.shutdown(); while (!mDataServer.isClosed() || mServer.isServing() || mHeartbeatThread.isAlive()) { // TODO The reason to stop and close again is due to some issues in Thrift. mServer.stop(); mServerTNonblockingServerSocket.close(); CommonUtils.sleepMs(null, 100); } mHeartbeatThread.join(); }
@Override public void run() { long lastCheckMs = System.currentTimeMillis(); while (mRunning) { // Check the time since last check, and wait until it is within check interval long lastIntervalMs = System.currentTimeMillis() - lastCheckMs; long toSleepMs = mCheckIntervalMs - lastIntervalMs; if (toSleepMs > 0) { CommonUtils.sleepMs(LOG, toSleepMs); } else { LOG.warn("Space reserver took: {}, expected: {}", lastIntervalMs, mCheckIntervalMs); } reserveSpace(); } }
/** Kill Tachyon Master by 'kill -9' command. */ private static void killMaster() { String[] killMasterCommand = new String[] { "/usr/bin/env", "bash", "-c", "for pid in `ps -Aww -o pid,command | grep -i \"[j]ava\" | grep " + "\"tachyon.master.TachyonMaster\" | awk '{print $1}'`; do kill -9 \"$pid\"; done" }; try { Runtime.getRuntime().exec(killMasterCommand).waitFor(); CommonUtils.sleepMs(LOG, 1000); } catch (Exception e) { LOG.error("Error when killing Master", e); } }
@Test public void readMultiFiles() throws IOException, TException { final int length = WORKER_CAPACITY_BYTES / 2 + 1; TachyonFile file1 = TachyonFSTestUtils.createByteFile( mTFS, "/readFile1", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, length); BlockInfo block1 = getFirstBlockInfo(file1); DataServerMessage recvMsg1 = request(block1); assertValid(recvMsg1, length, block1.getBlockId(), 0, length); TachyonFile file2 = TachyonFSTestUtils.createByteFile( mTFS, "/readFile2", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, length); BlockInfo block2 = getFirstBlockInfo(file2); DataServerMessage recvMsg2 = request(block2); assertValid(recvMsg2, length, block2.getBlockId(), 0, length); CommonUtils.sleepMs( mWorkerTachyonConf.getInt(Constants.WORKER_TO_MASTER_HEARTBEAT_INTERVAL_MS) * 2 + 10); FileInfo fileInfo = mTFS.getInfo(mTFS.open(new TachyonURI("/readFile1"))); Assert.assertEquals(0, fileInfo.inMemoryPercentage); }
// Tests that pinning a file prevents it from being evicted. @Test public void pinFileTest() throws Exception { // Create a file that fills the entire Tachyon store TachyonFile file = TachyonFSTestUtils.createByteFile( mTFS, "/test1", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, MEM_CAPACITY_BYTES); // Pin the file mTFS.setState(file, mSetPinned); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); // Confirm the pin with master Assert.assertTrue(mTFS.getInfo(file).isIsPinned()); // Try to create a file that cannot be stored unless the previous file is evicted, expect an // exception since worker cannot serve the request mThrown.expect(IOException.class); TachyonFSTestUtils.createByteFile( mTFS, "/test2", TachyonStorageType.STORE, UnderStorageType.NO_PERSIST, MEM_CAPACITY_BYTES); }
/** * Sleep for the given number of milliseconds, reporting interruptions using the given logger. * * @param logger logger for reporting interruptions * @param timeMs sleep duration in milliseconds */ public static void sleepMs(Logger logger, long timeMs) { sleepMs(logger, timeMs, false); }
/** * Sleep for the given number of milliseconds. * * @param timeMs sleep duration in milliseconds */ public static void sleepMs(long timeMs) { sleepMs(null, timeMs); }
@Override public void run() { while (true) { try { int fileId = -1; synchronized (mDependencyLock) { fileId = getFileIdBasedOnPriorityDependency(); if (fileId == -1) { if (mPriorityDependencies.size() == 0) { mPriorityDependencies = getSortedPriorityDependencyList(); if (!mPriorityDependencies.isEmpty()) { LOG.info( "Get new mPriorityDependencies " + CommonUtils.listToString(mPriorityDependencies)); } } else { List<Integer> tList = getSortedPriorityDependencyList(); boolean equal = true; if (mPriorityDependencies.size() != tList.size()) { equal = false; } if (equal) { for (int k = 0; k < tList.size(); k++) { if (tList.get(k) != mPriorityDependencies.get(k)) { equal = false; break; } } } if (!equal) { mPriorityDependencies = tList; } } fileId = getFileIdBasedOnPriorityDependency(); } if (fileId == -1) { fileId = getRandomUncheckpointedFile(); } } if (fileId == -1) { LOG.debug("Thread " + ID + " has nothing to checkpoint. Sleep for 1 sec."); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); continue; } // TODO checkpoint process. In future, move from midPath to dstPath should be done by // master String midPath = CommonUtils.concat(mUnderfsWorkerDataFolder, fileId); String dstPath = CommonUtils.concat(CommonConf.get().UNDERFS_DATA_FOLDER, fileId); LOG.info( "Thread " + ID + " is checkpointing file " + fileId + " from " + mLocalDataFolder.toString() + " to " + midPath + " to " + dstPath); if (mCheckpointUnderFs == null) { mCheckpointUnderFs = UnderFileSystem.get(midPath); } long startCopyTimeMs = System.currentTimeMillis(); ClientFileInfo fileInfo = mMasterClient.getClientFileInfoById(fileId); if (!fileInfo.isComplete) { LOG.error("File " + fileInfo + " is not complete!"); continue; } for (int k = 0; k < fileInfo.blockIds.size(); k++) { lockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID); } OutputStream os = mCheckpointUnderFs.create(midPath, (int) fileInfo.getBlockSizeByte()); long fileSizeByte = 0; for (int k = 0; k < fileInfo.blockIds.size(); k++) { File tempFile = new File(CommonUtils.concat(mLocalDataFolder.toString(), fileInfo.blockIds.get(k))); fileSizeByte += tempFile.length(); InputStream is = new FileInputStream(tempFile); byte[] buf = new byte[16 * Constants.KB]; int got = is.read(buf); while (got != -1) { os.write(buf, 0, got); got = is.read(buf); } is.close(); } os.close(); if (!mCheckpointUnderFs.rename(midPath, dstPath)) { LOG.error("Failed to rename from " + midPath + " to " + dstPath); } mMasterClient.addCheckpoint(mWorkerId, fileId, fileSizeByte, dstPath); for (int k = 0; k < fileInfo.blockIds.size(); k++) { unlockBlock(fileInfo.blockIds.get(k), Users.sCHECKPOINT_USER_ID); } long shouldTakeMs = (long) (1000.0 * fileSizeByte / Constants.MB / WorkerConf.get().WORKER_PER_THREAD_CHECKPOINT_CAP_MB_SEC); long currentTimeMs = System.currentTimeMillis(); if (startCopyTimeMs + shouldTakeMs > currentTimeMs) { long shouldSleepMs = startCopyTimeMs + shouldTakeMs - currentTimeMs; LOG.info( "Checkpointed last file " + fileId + " took " + (currentTimeMs - startCopyTimeMs) + " ms. Need to sleep " + shouldSleepMs + " ms."); CommonUtils.sleepMs(LOG, shouldSleepMs); } } catch (FileDoesNotExistException e) { LOG.warn(e); } catch (SuspectedFileSizeException e) { LOG.error(e); } catch (BlockInfoException e) { LOG.error(e); } catch (IOException e) { LOG.error(e); } catch (TException e) { LOG.warn(e); } } }
@Test public void promoteBlock() throws Exception { TachyonFile file1 = TachyonFSTestUtils.createByteFile( mTFS, "/root/test1", TachyonStorageType.STORE, UnderStorageType.SYNC_PERSIST, MEM_CAPACITY_BYTES / 6); TachyonFile file2 = TachyonFSTestUtils.createByteFile( mTFS, "/root/test2", TachyonStorageType.STORE, UnderStorageType.SYNC_PERSIST, MEM_CAPACITY_BYTES / 2); TachyonFile file3 = TachyonFSTestUtils.createByteFile( mTFS, "/root/test3", TachyonStorageType.STORE, UnderStorageType.SYNC_PERSIST, MEM_CAPACITY_BYTES / 2); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); TachyonFile toPromote = null; int toPromoteLen = 0; FileInfo file1Info = mTFS.getInfo(file1); FileInfo file2Info = mTFS.getInfo(file2); FileInfo file3Info = mTFS.getInfo(file3); // We know some file will not be in memory, but not which one since we do not want to make // any assumptions on the eviction policy if (file1Info.getInMemoryPercentage() < 100) { toPromote = file1; toPromoteLen = (int) file1Info.getLength(); Assert.assertEquals(100, file2Info.getInMemoryPercentage()); Assert.assertEquals(100, file3Info.getInMemoryPercentage()); } else if (file2Info.getInMemoryPercentage() < 100) { toPromote = file2; toPromoteLen = (int) file2Info.getLength(); Assert.assertEquals(100, file1Info.getInMemoryPercentage()); Assert.assertEquals(100, file3Info.getInMemoryPercentage()); } else { toPromote = file3; toPromoteLen = (int) file3Info.getLength(); Assert.assertEquals(100, file1Info.getInMemoryPercentage()); Assert.assertEquals(100, file2Info.getInMemoryPercentage()); } FileInStream is = mTFS.getInStream( toPromote, new InStreamOptions.Builder(mWorkerConf) .setTachyonStorageType(TachyonStorageType.PROMOTE) .build()); byte[] buf = new byte[toPromoteLen]; int len = is.read(buf); is.close(); CommonUtils.sleepMs(LOG, mWorkerToMasterHeartbeatIntervalMs * 3); Assert.assertEquals(toPromoteLen, len); Assert.assertEquals(100, mTFS.getInfo(toPromote).getInMemoryPercentage()); }
public void start() throws IOException { int maxLevel = 1; mTachyonHome = File.createTempFile("Tachyon", "U" + System.currentTimeMillis()).getAbsolutePath(); mWorkerDataFolder = "/datastore"; mHostname = NetworkAddressUtils.getLocalHostName(100); mMasterConf = MasterContext.getConf(); mMasterConf.set(Constants.IN_TEST_MODE, "true"); mMasterConf.set(Constants.TACHYON_HOME, mTachyonHome); mMasterConf.set(Constants.USE_ZOOKEEPER, "true"); mMasterConf.set(Constants.MASTER_HOSTNAME, mHostname); mMasterConf.set(Constants.MASTER_BIND_HOST, mHostname); mMasterConf.set(Constants.MASTER_PORT, "0"); mMasterConf.set(Constants.MASTER_WEB_BIND_HOST, mHostname); mMasterConf.set(Constants.MASTER_WEB_PORT, "0"); mMasterConf.set(Constants.ZOOKEEPER_ADDRESS, mCuratorServer.getConnectString()); mMasterConf.set(Constants.ZOOKEEPER_ELECTION_PATH, "/election"); mMasterConf.set(Constants.ZOOKEEPER_LEADER_PATH, "/leader"); mMasterConf.set(Constants.USER_QUOTA_UNIT_BYTES, "10000"); mMasterConf.set(Constants.USER_DEFAULT_BLOCK_SIZE_BYTE, Integer.toString(mUserBlockSize)); // Since tests are always running on a single host keep the resolution timeout low as otherwise // people running with strange network configurations will see very slow tests mMasterConf.set(Constants.HOST_RESOLUTION_TIMEOUT_MS, "250"); // Disable hdfs client caching to avoid file system close() affecting other clients System.setProperty("fs.hdfs.impl.disable.cache", "true"); // re-build the dir to set permission to 777 deleteDir(mTachyonHome); mkdir(mTachyonHome); for (int k = 0; k < mNumOfMasters; k++) { final LocalTachyonMaster master = LocalTachyonMaster.create(mTachyonHome); master.start(); LOG.info( "master NO." + k + " started, isServing: " + master.isServing() + ", address: " + master.getAddress()); mMasters.add(master); // Each master should generate a new port for binding mMasterConf.set(Constants.MASTER_PORT, "0"); } // Create the directories for the data and workers after LocalTachyonMaster construction, // because LocalTachyonMaster sets the UNDERFS_DATA_FOLDER and UNDERFS_WORKERS_FOLDER. mkdir(mMasterConf.get(Constants.UNDERFS_DATA_FOLDER)); mkdir(mMasterConf.get(Constants.UNDERFS_WORKERS_FOLDER)); LOG.info("all " + mNumOfMasters + " masters started."); LOG.info("waiting for a leader."); boolean hasLeader = false; while (!hasLeader) { for (int i = 0; i < mMasters.size(); i++) { if (mMasters.get(i).isServing()) { LOG.info( "master NO." + i + " is selected as leader. address: " + mMasters.get(i).getAddress()); hasLeader = true; break; } } } // Use first master port mMasterConf.set(Constants.MASTER_PORT, getMasterPort() + ""); CommonUtils.sleepMs(10); mWorkerConf = WorkerContext.getConf(); mWorkerConf.merge(mMasterConf); mWorkerConf.set(Constants.WORKER_DATA_FOLDER, mWorkerDataFolder); mWorkerConf.set(Constants.WORKER_MEMORY_SIZE, mWorkerCapacityBytes + ""); mWorkerConf.set(Constants.WORKER_TO_MASTER_HEARTBEAT_INTERVAL_MS, 15 + ""); // Setup conf for worker mWorkerConf.set(Constants.WORKER_MAX_TIERED_STORAGE_LEVEL, Integer.toString(maxLevel)); mWorkerConf.set(String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_ALIAS_FORMAT, 0), "MEM"); mWorkerConf.set( String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_PATH_FORMAT, 0), mTachyonHome + "/ramdisk"); mWorkerConf.set( String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_QUOTA_FORMAT, 0), mWorkerCapacityBytes + ""); // Since tests are always running on a single host keep the resolution timeout low as otherwise // people running with strange network configurations will see very slow tests mWorkerConf.set(Constants.HOST_RESOLUTION_TIMEOUT_MS, "250"); for (int level = 1; level < maxLevel; level++) { String tierLevelDirPath = String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_PATH_FORMAT, level); String[] dirPaths = mWorkerConf.get(tierLevelDirPath).split(","); String newPath = ""; for (String dirPath : dirPaths) { newPath += mTachyonHome + dirPath + ","; } mWorkerConf.set( String.format(Constants.WORKER_TIERED_STORAGE_LEVEL_DIRS_PATH_FORMAT, level), newPath.substring(0, newPath.length() - 1)); } mWorkerConf.set(Constants.WORKER_BIND_HOST, mHostname); mWorkerConf.set(Constants.WORKER_PORT, "0"); mWorkerConf.set(Constants.WORKER_DATA_BIND_HOST, mHostname); mWorkerConf.set(Constants.WORKER_DATA_PORT, "0"); mWorkerConf.set(Constants.WORKER_WEB_BIND_HOST, mHostname); mWorkerConf.set(Constants.WORKER_WEB_PORT, "0"); mWorkerConf.set(Constants.WORKER_MIN_WORKER_THREADS, "1"); mWorkerConf.set(Constants.WORKER_MAX_WORKER_THREADS, "100"); // Perform immediate shutdown of data server. Graceful shutdown is unnecessary and slow mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_QUIET_PERIOD, Integer.toString(0)); mWorkerConf.set(Constants.WORKER_NETWORK_NETTY_SHUTDOWN_TIMEOUT, Integer.toString(0)); mWorker = new BlockWorker(); Runnable runWorker = new Runnable() { @Override public void run() { try { mWorker.process(); } catch (Exception e) { throw new RuntimeException(e + " \n Start Master Error \n" + e.getMessage(), e); } } }; mWorkerThread = new Thread(runWorker); mWorkerThread.start(); // The client context should reflect the updates to the conf. if (sReinitializer == null) { ClientContext.accessReinitializer(sReinitializerAccesser); } sReinitializer.reinitializeWithConf(mWorkerConf); }
public static void main(String[] args) { // Parse the input args. if (!parseInputArgs(args)) { System.exit(EXIT_FAILED); } System.out.println("Stop the current Tachyon cluster..."); stopCluster(); // Set NO_STORE and NO_PERSIST so that this test can work without TachyonWorker. sOutStreamOptions = new OutStreamOptions.Builder(new TachyonConf()) .setTachyonStorageType(TachyonStorageType.NO_STORE) .setUnderStorageType(UnderStorageType.NO_PERSIST) .build(); // Set the max retry to avoid long pending for client disconnect. if (System.getProperty(Constants.MASTER_RETRY_COUNT) == null) { System.setProperty(Constants.MASTER_RETRY_COUNT, "10"); } System.out.println("Start Journal Crash Test..."); long startTimeMs = System.currentTimeMillis(); boolean ret = true; startMaster(); int rounds = 0; while (System.currentTimeMillis() - startTimeMs < sTotalTimeMs) { rounds++; long aliveTimeMs = (long) (Math.random() * sMaxAliveTimeMs) + 100; LOG.info("Round {}: Planning Master Alive Time {}ms.", rounds, aliveTimeMs); System.out.println("Round " + rounds + " : Launch Clients..."); sTfs = TachyonFileSystemFactory.get(); sOldTfs = TachyonFS.get(ClientContext.getConf()); try { sTfs.delete(sTfs.open(new TachyonURI(sTestDir))); } catch (Exception ioe) { // Test Directory not exist } // Launch all the client threads. setupClientThreads(); for (Thread thread : sClientThreadList) { thread.start(); } CommonUtils.sleepMs(LOG, aliveTimeMs); System.out.println("Round " + rounds + " : Crash Master..."); killMaster(); for (ClientThread clientThread : sClientThreadList) { clientThread.setIsStopped(true); } for (Thread thread : sClientThreadList) { try { thread.join(); } catch (InterruptedException e) { LOG.error("Error when waiting thread", e); } } System.out.println("Round " + rounds + " : Check Status..."); startMaster(); boolean checkSuccess = false; try { checkSuccess = checkStatus(); } catch (Exception e) { LOG.error("Failed to check status", e); } Utils.printPassInfo(checkSuccess); ret &= checkSuccess; } stopCluster(); System.exit(ret ? EXIT_SUCCESS : EXIT_FAILED); }
@Override public void run() { LOG.info("{}: Journal tailer started.", mMaster.getServiceName()); // Continually loop loading the checkpoint file, and then loading all completed files. The loop // only repeats when the checkpoint file is updated after it was read. while (!mInitiateShutdown) { try { // The start time (ms) for the initiated shutdown. long waitForShutdownStart = -1; // Load the checkpoint file. LOG.info("{}: Waiting to load the checkpoint file.", mMaster.getServiceName()); mJournalTailer = new JournalTailer(mMaster, mJournal); while (!mJournalTailer.checkpointExists()) { CommonUtils.sleepMs(LOG, mJournalTailerSleepTimeMs); if (mInitiateShutdown) { LOG.info("Journal tailer has been shutdown while waiting to load the checkpoint file."); mStopped = true; return; } } LOG.info("{}: Start loading the checkpoint file.", mMaster.getServiceName()); mJournalTailer.processJournalCheckpoint(true); LOG.info("{}: Checkpoint file has been loaded.", mMaster.getServiceName()); // Continually process completed log files. while (mJournalTailer.isValid()) { if (mJournalTailer.processNextJournalLogFiles() > 0) { // Reset the shutdown timer. waitForShutdownStart = -1; } else { if (mInitiateShutdown) { if (waitForShutdownStart == -1) { waitForShutdownStart = CommonUtils.getCurrentMs(); } else if ((CommonUtils.getCurrentMs() - waitForShutdownStart) > mShutdownQuietWaitTimeMs) { // There have been no new logs for the quiet period. Shutdown now. LOG.info( "{}: Journal tailer has been shutdown. No new logs for the quiet period.", mMaster.getServiceName()); mStopped = true; return; } } LOG.debug( "{}: The next complete log file does not exist yet. " + "Sleeping and checking again.", mMaster.getServiceName()); CommonUtils.sleepMs(LOG, mJournalTailerSleepTimeMs); } } LOG.info( "{}: The checkpoint is out of date. Will reload the checkpoint file.", mMaster.getServiceName()); CommonUtils.sleepMs(LOG, mJournalTailerSleepTimeMs); } catch (IOException ioe) { // Log the error and continue the loop. LOG.error(ioe.getMessage()); } } LOG.info("{}: Journal tailer has been shutdown.", mMaster.getServiceName()); mStopped = true; }