@Test public void testParse() throws Exception { // generate a sample JSON array StringBuilder sample = new StringBuilder(); sample.append("["); int i = 0; final int N = 5000; for (; i < N; i++) { sample.append(i); sample.append(","); } sample.append(i); sample.append("]"); String json = sample.toString(); StopWatch timer = new StopWatch(); for (int n = 0; n < 500; n++) { JSONPullParser parser = new JSONPullParser(json); JSONEvent e; while ((e = parser.next()) != JSONEvent.EndJSON) {} } _logger.info("time: " + timer.getElapsedTime()); }
@Test public void testLexerPerformance() throws Exception { // generate a sample JSON array StringBuilder sample = new StringBuilder(); sample.append("["); int i = 0; final int N = 5000; for (; i < N; i++) { sample.append(i); sample.append(","); } sample.append(i); sample.append("]"); String json = sample.toString(); StopWatch timer = new StopWatch(); for (int n = 0; n < 500; n++) { JSONLexer lexer = new JSONLexer(json); Token t; while ((t = lexer.nextToken()) != null) {} } _logger.info("time: " + timer.getElapsedTime()); }
@Test public void performanceOfCSVSplit() throws Exception { StopWatch s = new StopWatch(); String line = null; final int N = 1; Pattern p = Pattern.compile(","); s.reset(); for (int i = 0; i < N; i++) { BufferedReader br = new BufferedReader( new InputStreamReader( FileResource.find(SilkWalkerTest.class, "scaffold1.silk").openStream())); while ((line = br.readLine()) != null) { ArrayList<String> csv = StringUtil.splitCSV(line); } } _logger.info("OpenCSV split:" + s.getElapsedTime()); }
public void SAIS(LSeq SA) { StopWatch timer = new StopWatch(); _logger.info("SAIS: N=" + SA.textSize()); // Determin T[N-1]'s LS-type // T[i] is SType if T[i,_) < T[i+1,_) // T[i] is LType if T[i,_) > T[i+1,_) { long i = 0; for (; i < N; ++i) { long x = T.lookup((N + i - 1) % N); long y = T.lookup((N + i) % N); if (x == y) continue; if (x < y) { LS.set(N - 1, SType); break; } else { LS.set(N - 1, LType); break; } } if (i == N) { // When T = AAAA... , etc. LS.set(N - 1, LType); } } // T[i] is SType if T[i] < T[i+1] or T[i] = T[i+1] and T[i+1] is S-type // T[i] is LType if T[i] > T[i+1] or T[i] = T[i+1] and T[i+1] is L-type // Set the LS type of each character for (long i = N - 1; i > 0; --i) { long x = T.lookup(i); long y = T.lookup(i - 1); if (x < y) LS.set(i - 1, LType); else if (x > y) LS.set(i - 1, SType); else LS.set(i - 1, LS.get(i)); } // Initialize the buckets. // A bucket is a container of the suffixes sharing the same first character { _logger.trace("Initialize the buckets"); Arrays.fill(bucketEnd, 0); // Compute the size of each bucket for (long i = 0; i < N; ++i) { ++bucketEnd[(int) T.lookup(i)]; } // Accumulate the character counts. The bucketStart holds the pointers to beginning of the // buckets in SA for (int i = 1; i < bucketEnd.length; ++i) { bucketEnd[i] += bucketEnd[i - 1]; } _logger.trace("Done."); } // initialize the suffix array for (long i = 0; i < N; ++i) SA.set(i, N); // Step 1: reduce the problem by at least 1/2 // Sort all the S-substrings // Find LMS characters long[] cursorInBucket = Arrays.copyOf(bucketEnd, bucketEnd.length); for (long i = 0; i < N; ++i) { if (isLMS(i)) SA.set(--cursorInBucket[(int) T.lookup(i)], i); } // Induced sorting LMS prefixes { _logger.trace(String.format("[N=%,d] induceSA", SA.textSize())); induceSA(SA); _logger.trace("Done."); } int numLMS = 0; // Compact all the sorted substrings into the first M items of SA // 2*M must be not larger than N for (long i = 0; i < N; ++i) { long si = SA.lookup(i); if (si != N && isLMS(si)) SA.set(numLMS++, si); } // Initialize the name array buffer for (long i = numLMS; i < N; ++i) SA.set(i, N); // Find the lexicographic names of the LMS substrings _logger.trace("Sorting LMS substrings: N=" + SA.textSize()); int name = 1; SA.set(numLMS + (SA.lookup(0) / 2), name++); for (long i = 1; i < numLMS; ++i) { final long prev = SA.lookup(i - 1); final long current = SA.lookup(i); if (!isEqualLMSSubstring(prev, current)) { name++; } SA.set(numLMS + (current / 2), name - 1); } for (long i = N - 1, j = N - 1; i >= numLMS; --i) { if (SA.lookup(i) != N) SA.set(j--, SA.lookup(i) - 1); } // Step 2: solve the reduced problem // Create SA1, a view of SA[0, numLMS-1] _logger.trace("Solving the reduced problem: N=" + SA.textSize()); LSeq SA1 = new ArrayWrap(SA, 0, numLMS); LSeq T1 = new ArrayWrap(SA, N - numLMS, numLMS); if (name - 1 != numLMS) { new CyclicSAIS(T1, name - 1).SAIS(SA1); } else { // When all LMS substrings have unique names for (long i = 0; i < numLMS; i++) SA1.set(T1.lookup(i), i); } // Step 3: Induce SA from SA1 // Construct P1 using T1 buffer for (long i = 0, j = 0; i < N; ++i) { if (isLMS(i)) T1.set(j++, i); // } // Translate short name into the original index at T // SA1 now holds the LMS-substring indexes for (long i = 0; i < numLMS; ++i) { SA1.set(i, T1.lookup(SA1.lookup(i))); } // Step 3-1: Put all the items in SA1 into corresponding S-type buckets of SA // Clear SA[N1 .. N-1] for (long i = numLMS; i < N; ++i) { SA.set(i, N); } // Put SA1 contents into S-type buckets of SA System.arraycopy(bucketEnd, 0, cursorInBucket, 0, bucketEnd.length); for (int i = numLMS - 1; i >= 0; --i) { long si = SA1.lookup(i); SA.set(i, N); long index = --cursorInBucket[(int) T.lookup(si)]; SA.set(index, si); } // SA.set(0, T.textSize() - 1); // Step 3-2, 3-3 _logger.trace("Inducing SA from SA1: N=" + SA.textSize()); induceSA(SA); _logger.info(String.format("done. %.2f sec.", timer.getElapsedTime())); }
@Test public void performanceOfTabSplit() throws Exception { StopWatch s = new StopWatch(); String line = null; final int N = 1; Pattern p = Pattern.compile("\t"); s.reset(); for (int i = 0; i < N; i++) { BufferedReader br = new BufferedReader( new InputStreamReader( FileResource.find(SilkWalkerTest.class, "scaffold1.silk").openStream())); while ((line = br.readLine()) != null) { String[] tab = p.split(line); } } _logger.info("default tab split:" + s.getElapsedTime()); s.reset(); for (int i = 0; i < N; i++) { BufferedReader br = new BufferedReader( new InputStreamReader( FileResource.find(SilkWalkerTest.class, "scaffold1.silk").openStream())); while ((line = br.readLine()) != null) { ArrayList<String> tokens = StringUtil.splitAtTab(line); } } _logger.info("my tab split:" + s.getElapsedTime()); s.reset(); for (int i = 0; i < N; i++) { BufferedReader br = new BufferedReader( new InputStreamReader( FileResource.find(SilkWalkerTest.class, "scaffold1.silk").openStream())); while ((line = br.readLine()) != null) { StringTokenizer t = new StringTokenizer(line, "\t"); ArrayList<String> tokens = new ArrayList<String>(); while (t.hasMoreTokens()) { tokens.add(t.nextToken()); } } } _logger.info("tokenizer tab split:" + s.getElapsedTime()); s.reset(); for (int i = 0; i < N; i++) { FastBufferedReader fb = new FastBufferedReader( new InputStreamReader( FileResource.find(SilkWalkerTest.class, "scaffold1.silk").openStream())); while ((line = fb.readLine()) != null) { ArrayList<String> tab = StringUtil.splitAtTab(line); } } _logger.info("fast reader:" + s.getElapsedTime()); }
@Test public void perfTest() throws Exception { StopWatch s = new StopWatch(); File in = FileResource.copyToTemp(SilkWalkerTest.class, "scaffold1.silk", new File("target")); final int N = 2; { s.reset(); int lineCount = 0; char[] buf = new char[8192]; for (int i = 0; i < N; i++) { BufferedReader fb = new BufferedReader(new FileReader(in)); for (int readSize = 0; (readSize = fb.read(buf)) != -1; ) { for (int c = 0; c < readSize; c++) { if (buf[c] == '\r') { lineCount++; if (c + 1 < readSize && buf[c + 1] == '\n') { c++; } } else if (buf[c] == '\n') { lineCount++; } } } } _logger.info(String.format("BufferedReader: %.2f", s.getElapsedTime())); } { s.reset(); int lineCount = 0; for (int i = 0; i < N; i++) { BufferedScanner fb = new BufferedScanner(new FileInputStream(in)); for (CharSequence line; (line = fb.nextLine()) != null; lineCount++) {} } _logger.info( String.format("BufferedScanner nextLine: %.2f, line:%d", s.getElapsedTime(), lineCount)); } { s.reset(); int lineCount = 0; for (int i = 0; i < N; i++) { BufferedReader fb = new BufferedReader(new FileReader(in)); for (String line; (line = fb.readLine()) != null; lineCount++) {} } _logger.info( String.format("BufferedReader readLine: %.2f, line:%d", s.getElapsedTime(), lineCount)); } { s.reset(); int lineCount = 0; for (int i = 0; i < N; i++) { BufferedScanner fb = new BufferedScanner(new FileInputStream(in)); for (CharSequence line; (line = fb.nextLine()) != null; lineCount++) { String st = line.toString(); } } _logger.info( String.format( "BufferedScanner nextLine (with String conversion): %.2f, line:%d", s.getElapsedTime(), lineCount)); } { s.reset(); int lineCount = 0; for (int i = 0; i < N; i++) { BufferedScanner fb = new BufferedScanner(new FileReader(in)); for (CharSequence line; (line = fb.nextLine()) != null; lineCount++) {} } _logger.info( String.format( "BufferedScanner nextLine with Reader input: %.2f, line:%d", s.getElapsedTime(), lineCount)); } { s.reset(); int lineCount = 0; char[] buf = new char[8192]; for (int i = 0; i < N; i++) { FastBufferedReader fb = new FastBufferedReader(new FileReader(in)); for (int readSize = 0; (readSize = fb.read(buf)) != -1; ) { for (int c = 0; c < readSize; c++) { if (buf[c] == '\r') { lineCount++; if (c + 1 < readSize && buf[c + 1] == '\n') { c++; } } else if (buf[c] == '\n') { lineCount++; } } } } _logger.info(String.format("FastBufferedReader: %.2f", s.getElapsedTime())); } { s.reset(); int lineCount = 0; byte[] buf = new byte[8192]; for (int i = 0; i < N; i++) { FastBufferedInputStream fb = new FastBufferedInputStream(new FileInputStream(in)); for (int readSize = 0; (readSize = fb.read(buf)) != -1; ) { for (int c = 0; c < readSize; c++) { if (buf[c] == '\r') { lineCount++; if (c + 1 < readSize && buf[c + 1] == '\n') { c++; } } else if (buf[c] == '\n') { lineCount++; } } } } _logger.info(String.format("FastBufferedInputStream: %.2f", s.getElapsedTime())); } { s.reset(); int lineCount = 0; byte[] buf = new byte[8192]; for (int i = 0; i < N; i++) { BufferedInputStream fb = new BufferedInputStream(new FileInputStream(in)); for (int readSize = 0; (readSize = fb.read(buf)) != -1; ) { for (int c = 0; c < readSize; c++) { if (buf[c] == '\r') { lineCount++; if (c + 1 < readSize && buf[c + 1] == '\n') { c++; } } else if (buf[c] == '\n') { lineCount++; } } } } _logger.info(String.format("BufferedInputStream: %.2f", s.getElapsedTime())); } }