public void testWarcIteration() throws Exception { warcJob = new ExtractCDXFromWarcJob() { public void processRecord(ArchiveRecord sar, OutputStream os) { super.processRecord(sar, new ByteArrayOutputStream()); processed++; } }; OutputStream os = new ByteArrayOutputStream(); assertFalse("The to-be-generated file should not exist aforehand", TestInfo.CDX_FILE.exists()); os = new FileOutputStream(TestInfo.CDX_FILE); warcBlaf.run(warcJob, os); os.close(); }
public void testExtractCDXJobWithWarcfilesIncludeChecksum() throws Exception { warcJob = new ExtractCDXFromWarcJob(true); OutputStream os = new ByteArrayOutputStream(); assertFalse("The to-be-generated file should not exist aforehand", TestInfo.CDX_FILE.exists()); os = new FileOutputStream(TestInfo.CDX_FILE); warcBlaf.run(warcJob, os); os.close(); List<ExceptionOccurrence> exceptions = warcJob.getExceptions(); for (ExceptionOccurrence eo : exceptions) { System.out.println("Exception: " + eo.getException()); } // assertFalse(warcJob.getExceptions().isEmpty()); System.out.println(FileUtils.readFile(TestInfo.CDX_FILE)); }
/** Verify that the job runs without problems and visits all relevant records. */ public void testRun() throws IOException { warcJob = new ExtractCDXFromWarcJob() { public void processRecord(ArchiveRecord sar, OutputStream os) { super.processRecord(sar, new ByteArrayOutputStream()); processed++; } }; OutputStream os = new FileOutputStream(TestInfo.TEMP_FILE); warcBlaf.run(warcJob, os); os.close(); Exception[] es = warcJob.getExceptionArray(); printExceptions(es); assertEquals("No exceptions should be thrown", 0, es.length); assertEquals( "The correct number of records should be processed", TestInfo.NUM_RECORDS, processed); }
/** Test whether the class is really Serializable. */ public void testSerializability() throws IOException, ClassNotFoundException { // Take two jobs: one for study and one for reference. ExtractCDXFromWarcJob job1 = new StubbornJob(); ExtractCDXFromWarcJob job2 = new StubbornJob(); // Now serialize and deserialize the studied job (but NOT the reference): ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream ous = new ObjectOutputStream(baos); ous.writeObject(job1); ous.close(); baos.close(); ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray())); job1 = (ExtractCDXFromWarcJob) ois.readObject(); // Finally, compare their outputs: ByteArrayOutputStream baos1 = new ByteArrayOutputStream(); ByteArrayOutputStream baos2 = new ByteArrayOutputStream(); // Run both jobs ordinarily: warcBlaf.run(job1, baos1); warcBlaf.run(job2, baos2); baos1.close(); baos2.close(); byte[] b1 = baos1.toByteArray(); byte[] b2 = baos2.toByteArray(); for (int i = 0; i < b1.length; ++i) { if (b1[i] != b2[i]) { System.out.println("b1=" + b1[i]); System.out.println("b2=" + b2[i]); fail("\nDifference at position " + i); } else { // System.out.println(b1[i]); } } assertTrue( "Output from cdx jobs should be the same", Arrays.equals(baos1.toByteArray(), baos2.toByteArray())); }