public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Need to give a filename"); System.exit(1); } HSLFSlideShow ss = new HSLFSlideShow(args[0]); // Find the documents, and then their SLWT Record[] records = ss.getRecords(); for (int i = 0; i < records.length; i++) { if (records[i] instanceof Document) { Record docRecord = records[i]; Record[] docChildren = docRecord.getChildRecords(); for (int j = 0; j < docChildren.length; j++) { if (docChildren[j] instanceof SlideListWithText) { System.out.println("Found SLWT at pos " + j + " in the Document at " + i); System.out.println(" Has " + docChildren[j].getChildRecords().length + " children"); // Grab the SlideAtomSet's, which contain // a SlidePersistAtom and then a bunch of text // + related records SlideListWithText slwt = (SlideListWithText) docChildren[j]; SlideListWithText.SlideAtomsSet[] thisSets = slwt.getSlideAtomsSets(); System.out.println(" Has " + thisSets.length + " AtomSets in it"); // Loop over the sets, showing what they contain for (int k = 0; k < thisSets.length; k++) { SlidePersistAtom spa = thisSets[k].getSlidePersistAtom(); System.out.println(" " + k + " has slide id " + spa.getSlideIdentifier()); System.out.println(" " + k + " has ref id " + spa.getRefID()); // Loop over the records, printing the text Record[] slwtc = thisSets[k].getSlideRecords(); for (int l = 0; l < slwtc.length; l++) { String text = null; if (slwtc[l] instanceof TextBytesAtom) { TextBytesAtom tba = (TextBytesAtom) slwtc[l]; text = tba.getText(); } if (slwtc[l] instanceof TextCharsAtom) { TextCharsAtom tca = (TextCharsAtom) slwtc[l]; text = tca.getText(); } if (text != null) { text = text.replace('\r', '\n'); System.out.println(" ''" + text + "''"); } } } } } } } }
/** * Use the PersistPtrHolder entries to figure out what is the "most recent" version of all the * core records (Document, Notes, Slide etc), and save a record of them. Do this by walking from * the oldest PersistPtr to the newest, overwriting any references found along the way with newer * ones */ private void findMostRecentCoreRecords() { // To start with, find the most recent in the byte offset domain Map<Integer, Integer> mostRecentByBytes = new HashMap<Integer, Integer>(); for (Record record : _hslfSlideShow.getRecords()) { if (record instanceof PersistPtrHolder) { PersistPtrHolder pph = (PersistPtrHolder) record; // If we've already seen any of the "slide" IDs for this // PersistPtr, remove their old positions int[] ids = pph.getKnownSlideIDs(); for (int id : ids) { if (mostRecentByBytes.containsKey(id)) { mostRecentByBytes.remove(id); } } // Now, update the byte level locations with their latest values Map<Integer, Integer> thisSetOfLocations = pph.getSlideLocationsLookup(); for (int id : ids) { mostRecentByBytes.put(id, thisSetOfLocations.get(id)); } } } // We now know how many unique special records we have, so init // the array _mostRecentCoreRecords = new Record[mostRecentByBytes.size()]; // We'll also want to be able to turn the slide IDs into a position // in this array _sheetIdToCoreRecordsLookup = new HashMap<Integer, Integer>(); Integer[] allIDs = mostRecentByBytes.keySet().toArray(new Integer[mostRecentByBytes.size()]); Arrays.sort(allIDs); for (int i = 0; i < allIDs.length; i++) { _sheetIdToCoreRecordsLookup.put(allIDs[i], i); } Map<Integer, Integer> mostRecentByBytesRev = new HashMap<Integer, Integer>(mostRecentByBytes.size()); for (Map.Entry<Integer, Integer> me : mostRecentByBytes.entrySet()) { mostRecentByBytesRev.put(me.getValue(), me.getKey()); } // Now convert the byte offsets back into record offsets for (Record record : _hslfSlideShow.getRecords()) { if (!(record instanceof PositionDependentRecord)) continue; PositionDependentRecord pdr = (PositionDependentRecord) record; int recordAt = pdr.getLastOnDiskOffset(); Integer thisID = mostRecentByBytesRev.get(recordAt); if (thisID == null) continue; // Bingo. Now, where do we store it? int storeAt = _sheetIdToCoreRecordsLookup.get(thisID); // Tell it its Sheet ID, if it cares if (pdr instanceof PositionDependentRecordContainer) { PositionDependentRecordContainer pdrc = (PositionDependentRecordContainer) record; pdrc.setSheetId(thisID); } // Finally, save the record _mostRecentCoreRecords[storeAt] = record; } // Now look for the interesting records in there for (Record record : _mostRecentCoreRecords) { // Check there really is a record at this number if (record != null) { // Find the Document, and interesting things in it if (record.getRecordType() == RecordTypes.Document.typeID) { _documentRecord = (Document) record; _fonts = _documentRecord.getEnvironment().getFontCollection(); } } else { // No record at this number // Odd, but not normally a problem } } }
private byte[] writeRecord(Record r) throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); r.writeOut(baos); return baos.toByteArray(); }