private void writeRecordToFile(WikiPage wikiPage, int cnIndex) { // Simplest approach, prior to any data processing: // Just get the whole {{}}, and the previous text delimited by . and . String pageText = wikiPage.getWikiText(); try { // pageName // NOTE: titles seem to include a new line delimiter, so it is stripped. String pageName = wikiPage.getTitle().replace("\n", "").replace(",", "").trim(); int secondIndex = pageText.indexOf("}}", cnIndex); String tagContents = pageText.substring(cnIndex, secondIndex + 2); tagContents = tagContents.replace("\n", "").replace(",", "").trim(); int precedingStartingIndex = cnIndex - PRECEDING_SENTENCE_LENGTH; String precedingSentence; if (precedingStartingIndex < 0) { System.out.println( "cnIndex was " + cnIndex + " so I saved precedingSentence from 0 to cnIndex." + " The page was " + pageName + "."); precedingSentence = pageText.substring(0, cnIndex); } else { precedingSentence = pageText.substring(precedingStartingIndex, cnIndex); precedingSentence = precedingSentence.replace("\n", "").replace(",", "").trim(); } fileWriter.append(pageName); fileWriter.append(COMMA_DELIMITER); fileWriter.append(tagContents); fileWriter.append(COMMA_DELIMITER); fileWriter.append(precedingSentence); fileWriter.append(NEW_LINE_SEPARATOR); } catch (Exception e) { System.out.println( "Error! We caught an exception. cnIndex was " + cnIndex + " on page " + wikiPage.getTitle() + "."); // e.printStackTrace(); } }