public static TaggedDocument readTaggedDocument(String id, String dir, boolean printData) { ObjectInput inputObject; TaggedDocument td = null; try { inputObject = new ObjectInputStream(new BufferedInputStream(new FileInputStream(dir + id + ".ser"))); try { td = (TaggedDocument) inputObject.readObject(); } catch (ClassNotFoundException e) { td = null; Logger.logln("Couldn't load TaggedDocument: " + id + ", from: " + dir); e.printStackTrace(); } finally { inputObject.close(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } if (printData == true && td != null) { System.out.println(td.toString()); } return td; }
/** * Reads a saved serialized HashMap of TreeData objects in 'dir' named 'id' and returns the * TreeContainer object. * * @param id name of HashMap * @param dir location of saved .ser file * @param printData if true, will print HashMap to string (via toString method) * @return HashMap of TreeData objects specified by 'id' and 'dir', or null if no TreeContainer * found */ @SuppressWarnings("unchecked") public static HashMap<String, TreeData> readTreeDataMap( String id, String dir, boolean printData) { ObjectInput inputObject; HashMap<String, TreeData> tdHash = null; try { inputObject = new ObjectInputStream(new BufferedInputStream(new FileInputStream(dir + id + ".ser"))); try { tdHash = (HashMap<String, TreeData>) inputObject.readObject(); } catch (ClassNotFoundException e) { tdHash = null; Logger.logln("Couldn't load ArrayList<TreeData>: " + id + ", from: " + dir); e.printStackTrace(); } finally { inputObject.close(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } if (printData == true && tdHash != null) { System.out.println(tdHash.toString()); } return tdHash; }
/** * Generic object writer * * @param o the Object to write * @param id name of object * @param dir directory to write the object to * @return true if no errors, false otherwise */ public static boolean writeObject(Object o, String id, String dir) { ObjectOutputStream outObject = null; System.out.println("Place to write: " + dir + id + ".ser"); try { outObject = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(dir + id + ".ser"))); try { outObject.writeObject(o); } finally { outObject.close(); } } catch (FileNotFoundException e) { Logger.logln("ERROR saving object: " + o.toString()); e.printStackTrace(); return false; } catch (IOException e) { Logger.logln("ERROR saving object: " + o.toString()); e.printStackTrace(); return false; } return true; }
public static void fatalError() { int ans = JOptionPane.showConfirmDialog( null, "Anonymouth has encountered a fatal error.\n" + "The best thing to do is to examine the stack trace, and terminate the program.\n" + "If you would like to save your work/problem set prior to closing Anonymouth,\n" + "click \"no\" below. Otherwise, click \"yes\" (this will close Anonymouth)", "Fatal error encountered. Terminate?", JOptionPane.YES_NO_OPTION); if (ans == 0) { Logger.logln("Fatal error encountered, termination requested.", LogOut.STDERR); System.exit(FATAL_ERROR); } }
public static Document removeUnicodeControlChars(Document dirtyDoc) { String newFile = "./temp/" + dirtyDoc.getTitle(); Document cleanDoc = new Document(); try { dirtyDoc.load(); cleanDoc.setText( (dirtyDoc.stringify()).replaceAll("\\p{C}&&[^\\t\\n\\r]", " ").toCharArray()); cleanDoc.setAuthor(dirtyDoc.getAuthor()); cleanDoc.setTitle(dirtyDoc.getTitle()); FileWriter fw = new FileWriter(new File("./temp/" + dirtyDoc.getTitle())); return cleanDoc; } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); Logger.logln( "ERROR! Could not load document: " + dirtyDoc.getTitle() + " (SentenceTools.removeUnicodeControlChars)"); return dirtyDoc; } }
/** * Takes a text (one String representing an entire document), and breaks it up into sentences. * Tries to find true ends of sentences: shouldn't break up sentences containing quoted sentences, * checks for sentences ending in a quoted sentence (e.x. He said, "Hello." ), will not break * sentences containing common abbreviations (such as Dr., Mr. U.S., etc.,e.x., i.e., and others), * and checks for ellipses points. However, It is probably not perfect. * * @param text * @return */ public ArrayList<String> makeSentenceTokens(String text) { ArrayList<String> sents = new ArrayList<String>(MAX_SENTENCES); boolean merge1 = false, mergeFinal = false; int currentStart = 1; int currentStop = 0; int lenText = text.length(); String temp; int openingQuoteIndex = 0; int closingQuoteIndex = 0; text = text.replaceAll("\u201C", "\""); text = text.replaceAll("\u201D", "\""); text = text.replaceAll("\\p{C}&&[^\\t\\n\\r]", " "); int notEOSNumber = 0; int numNotEOS = notEndsOfSentence.length; String replacementString = ""; String safeString = ""; for (notEOSNumber = 0; notEOSNumber < numNotEOS; notEOSNumber++) { replacementString = notEndsOfSentence[notEOSNumber].replaceAll("\\.", PERIOD_REPLACEMENT); // System.out.println("REPLACEMENT: "+replacementString); safeString = notEndsOfSentence[notEOSNumber].replaceAll("\\.", "\\\\."); // System.out.println(safeString); text = text.replaceAll("\\b(?i)" + safeString, replacementString); } Matcher sent = EOS_chars.matcher(text); boolean foundEOS = sent.find(currentStart); Matcher sentEnd; int charNum = 0; int lenString = 0; int lastQuoteAt = 0; boolean foundQuote = false; boolean isSentence; while (foundEOS == true) { currentStop = sent.end(); // System.out.println("Start: "+currentStart+" and Stop: "+currentStop); temp = text.substring(currentStart - 1, currentStop); // System.out.println(temp); lenString = temp.length(); lastQuoteAt = 0; foundQuote = false; for (charNum = 0; charNum < lenString; charNum++) { if (temp.charAt(charNum) == '\"') { lastQuoteAt = charNum; if (foundQuote == true) { foundQuote = false; } else { foundQuote = true; } // System.out.println("Found quote!!! here it is: "+temp.charAt(charNum)+" ... in // position: "+lastQuoteAt+" ... foundQuote is: "+foundQuote); } } if (foundQuote == true && ((closingQuoteIndex = temp.indexOf("\"", lastQuoteAt + 1)) == -1)) { if ((currentStop = text.indexOf("\"", currentStart + lastQuoteAt + 1)) == -1) { currentStop = text.length(); } else { currentStop += 1; merge1 = true; } } safeString = text.substring(currentStart - 1, currentStop); sentEnd = sentence_quote.matcher(safeString); isSentence = sentEnd.find(); // System.out.println("RESULT OF sentence_quote matching: "+isSentence); if (isSentence == true) { // If it seems that the text looks like this: He said, "Hello." Then she said, // "Hi." // Then we want to split this up into two sentences (it's possible to have a sentence like // this: He said, "Hello.") // System.out.println("start: "+sentEnd.start()+" ... end: "+sentEnd.end()); currentStop = text.indexOf("\"", sentEnd.start() + currentStart) + 1; safeString = text.substring(currentStart - 1, currentStop); } safeString = safeString.replaceAll(PERIOD_REPLACEMENT, "."); // System.out.println(safeString); if (mergeFinal) { mergeFinal = false; String prev = sents.remove(sents.size() - 1); safeString = prev + safeString; } if (merge1) { // makes so that the merge happens on the next pass through merge1 = false; mergeFinal = true; } sents.add(safeString); // System.out.println("start minus one: "+(currentStart-1)+" stop: "+currentStop); if (currentStart < 0 || currentStop < 0) { Logger.logln("Something went really wrong making sentence tokens."); System.exit(0); } // System.out.println("The rest of the text: "+text.substring(currentStart)); currentStart = currentStop + 1; if (currentStart >= lenText) { foundEOS = false; continue; } foundEOS = sent.find(currentStart); } return sents; }