@Override public void initialize(UimaContext context) throws ResourceInitializationException { if (revisionIdFile != null || revisionIdParamArray != null) { this.getLogger() .log( Level.WARNING, "Reading a predefined list of revisions is currently not supported by the WikipediaRevisionPairReader. Falling back to reading ALL revisions."); revisionIdFile = null; revisionIdParamArray = null; // TODO add support for reading a defined set of revisions (like the // WikipediaRevisionReader) } super.initialize(context); savedTimestamp = null; nrOfRevisionsProcessed = 0; }
@Override public void getNext(JCas jcas) throws IOException, CollectionException { super.getNext(jcas); Timestamp currentTimestamp = timestampIter.next(); if (currentTimestamp == null) { throw new CollectionException( new Throwable("Current timestamp is null. Upps ... should not happen.")); } this.getLogger().log(Level.FINE, currentArticle.getPageId() + "-" + currentTimestamp); try { JCas revView1 = jcas.createView(REVISION_1); JCas revView2 = jcas.createView(REVISION_2); Revision revision1; Revision revision2; String text1 = ""; String text2 = ""; if (nrOfRevisionsProcessed < skipFirstNPairs) { if (nrOfRevisionsProcessed % 1000 == 0) { this.getLogger().log(Level.INFO, "Skipping " + nrOfRevisionsProcessed + "th revision."); } // create fake revisions revision1 = getRevision(null); revision2 = getRevision(null); } else { revision1 = getRevision(savedTimestamp); revision2 = getRevision(currentTimestamp); text1 = getText(revision1); text2 = getText(revision2); int difference = Math.abs(text1.length() - text2.length()); if (difference < minChange || difference > maxChange) { text1 = ""; text2 = ""; } } revView1.setDocumentText(text1); revView2.setDocumentText(text2); addDocumentMetaData(jcas, currentArticle.getPageId(), revision1.getRevisionID()); addDocumentMetaData(revView1, currentArticle.getPageId(), revision1.getRevisionID()); addDocumentMetaData(revView2, currentArticle.getPageId(), revision2.getRevisionID()); addRevisionAnnotation(revView1, revision1); addRevisionAnnotation(revView2, revision2); savedTimestamp = currentTimestamp; if (!timestampIter.hasNext()) { savedTimestamp = null; } nrOfRevisionsProcessed++; } catch (WikiApiException e) { throw new CollectionException(e); } catch (CASException e) { throw new CollectionException(e); } }