Beispiel #1
0
 public static TaggedDocument readTaggedDocument(String id, String dir, boolean printData) {
   ObjectInput inputObject;
   TaggedDocument td = null;
   try {
     inputObject =
         new ObjectInputStream(new BufferedInputStream(new FileInputStream(dir + id + ".ser")));
     try {
       td = (TaggedDocument) inputObject.readObject();
     } catch (ClassNotFoundException e) {
       td = null;
       Logger.logln("Couldn't load TaggedDocument: " + id + ", from: " + dir);
       e.printStackTrace();
     } finally {
       inputObject.close();
     }
   } catch (FileNotFoundException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   }
   if (printData == true && td != null) {
     System.out.println(td.toString());
   }
   return td;
 }
Beispiel #2
0
 /**
  * Reads a saved serialized HashMap of TreeData objects in 'dir' named 'id' and returns the
  * TreeContainer object.
  *
  * @param id name of HashMap
  * @param dir location of saved .ser file
  * @param printData if true, will print HashMap to string (via toString method)
  * @return HashMap of TreeData objects specified by 'id' and 'dir', or null if no TreeContainer
  *     found
  */
 @SuppressWarnings("unchecked")
 public static HashMap<String, TreeData> readTreeDataMap(
     String id, String dir, boolean printData) {
   ObjectInput inputObject;
   HashMap<String, TreeData> tdHash = null;
   try {
     inputObject =
         new ObjectInputStream(new BufferedInputStream(new FileInputStream(dir + id + ".ser")));
     try {
       tdHash = (HashMap<String, TreeData>) inputObject.readObject();
     } catch (ClassNotFoundException e) {
       tdHash = null;
       Logger.logln("Couldn't load ArrayList<TreeData>: " + id + ", from: " + dir);
       e.printStackTrace();
     } finally {
       inputObject.close();
     }
   } catch (FileNotFoundException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   }
   if (printData == true && tdHash != null) {
     System.out.println(tdHash.toString());
   }
   return tdHash;
 }
Beispiel #3
0
 /**
  * Generic object writer
  *
  * @param o the Object to write
  * @param id name of object
  * @param dir directory to write the object to
  * @return true if no errors, false otherwise
  */
 public static boolean writeObject(Object o, String id, String dir) {
   ObjectOutputStream outObject = null;
   System.out.println("Place to write: " + dir + id + ".ser");
   try {
     outObject =
         new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(dir + id + ".ser")));
     try {
       outObject.writeObject(o);
     } finally {
       outObject.close();
     }
   } catch (FileNotFoundException e) {
     Logger.logln("ERROR saving object: " + o.toString());
     e.printStackTrace();
     return false;
   } catch (IOException e) {
     Logger.logln("ERROR saving object: " + o.toString());
     e.printStackTrace();
     return false;
   }
   return true;
 }
 public static void fatalError() {
   int ans =
       JOptionPane.showConfirmDialog(
           null,
           "Anonymouth has encountered a fatal error.\n"
               + "The best thing to do is to examine the stack trace, and terminate the program.\n"
               + "If you would like to save your work/problem set prior to closing Anonymouth,\n"
               + "click \"no\" below. Otherwise, click \"yes\" (this will close Anonymouth)",
           "Fatal error encountered. Terminate?",
           JOptionPane.YES_NO_OPTION);
   if (ans == 0) {
     Logger.logln("Fatal error encountered, termination requested.", LogOut.STDERR);
     System.exit(FATAL_ERROR);
   }
 }
  public static Document removeUnicodeControlChars(Document dirtyDoc) {
    String newFile = "./temp/" + dirtyDoc.getTitle();

    Document cleanDoc = new Document();
    try {
      dirtyDoc.load();
      cleanDoc.setText(
          (dirtyDoc.stringify()).replaceAll("\\p{C}&&[^\\t\\n\\r]", " ").toCharArray());
      cleanDoc.setAuthor(dirtyDoc.getAuthor());
      cleanDoc.setTitle(dirtyDoc.getTitle());
      FileWriter fw = new FileWriter(new File("./temp/" + dirtyDoc.getTitle()));
      return cleanDoc;
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      Logger.logln(
          "ERROR! Could not load document: "
              + dirtyDoc.getTitle()
              + " (SentenceTools.removeUnicodeControlChars)");
      return dirtyDoc;
    }
  }
  /**
   * Takes a text (one String representing an entire document), and breaks it up into sentences.
   * Tries to find true ends of sentences: shouldn't break up sentences containing quoted sentences,
   * checks for sentences ending in a quoted sentence (e.x. He said, "Hello." ), will not break
   * sentences containing common abbreviations (such as Dr., Mr. U.S., etc.,e.x., i.e., and others),
   * and checks for ellipses points. However, It is probably not perfect.
   *
   * @param text
   * @return
   */
  public ArrayList<String> makeSentenceTokens(String text) {
    ArrayList<String> sents = new ArrayList<String>(MAX_SENTENCES);
    boolean merge1 = false, mergeFinal = false;
    int currentStart = 1;
    int currentStop = 0;
    int lenText = text.length();
    String temp;
    int openingQuoteIndex = 0;
    int closingQuoteIndex = 0;
    text = text.replaceAll("\u201C", "\"");
    text = text.replaceAll("\u201D", "\"");
    text = text.replaceAll("\\p{C}&&[^\\t\\n\\r]", " ");
    int notEOSNumber = 0;
    int numNotEOS = notEndsOfSentence.length;
    String replacementString = "";
    String safeString = "";

    for (notEOSNumber = 0; notEOSNumber < numNotEOS; notEOSNumber++) {
      replacementString = notEndsOfSentence[notEOSNumber].replaceAll("\\.", PERIOD_REPLACEMENT);
      // System.out.println("REPLACEMENT: "+replacementString);
      safeString = notEndsOfSentence[notEOSNumber].replaceAll("\\.", "\\\\.");
      // System.out.println(safeString);
      text = text.replaceAll("\\b(?i)" + safeString, replacementString);
    }
    Matcher sent = EOS_chars.matcher(text);
    boolean foundEOS = sent.find(currentStart);
    Matcher sentEnd;
    int charNum = 0;
    int lenString = 0;
    int lastQuoteAt = 0;
    boolean foundQuote = false;
    boolean isSentence;
    while (foundEOS == true) {
      currentStop = sent.end();
      // System.out.println("Start: "+currentStart+" and Stop: "+currentStop);
      temp = text.substring(currentStart - 1, currentStop);
      // System.out.println(temp);
      lenString = temp.length();
      lastQuoteAt = 0;
      foundQuote = false;
      for (charNum = 0; charNum < lenString; charNum++) {
        if (temp.charAt(charNum) == '\"') {
          lastQuoteAt = charNum;
          if (foundQuote == true) {
            foundQuote = false;

          } else {
            foundQuote = true;
          }
          // System.out.println("Found quote!!! here it is: "+temp.charAt(charNum)+" ... in
          // position: "+lastQuoteAt+" ... foundQuote is: "+foundQuote);
        }
      }
      if (foundQuote == true && ((closingQuoteIndex = temp.indexOf("\"", lastQuoteAt + 1)) == -1)) {
        if ((currentStop = text.indexOf("\"", currentStart + lastQuoteAt + 1)) == -1) {
          currentStop = text.length();
        } else {
          currentStop += 1;
          merge1 = true;
        }
      }
      safeString = text.substring(currentStart - 1, currentStop);

      sentEnd = sentence_quote.matcher(safeString);
      isSentence = sentEnd.find();
      // System.out.println("RESULT OF sentence_quote matching: "+isSentence);
      if (isSentence
          == true) { // If it seems that the text looks like this: He said, "Hello." Then she said,
                     // "Hi."
        // Then we want to split this up into two sentences (it's possible to have a sentence like
        // this: He said, "Hello.")
        // System.out.println("start: "+sentEnd.start()+" ... end: "+sentEnd.end());
        currentStop = text.indexOf("\"", sentEnd.start() + currentStart) + 1;
        safeString = text.substring(currentStart - 1, currentStop);
      }

      safeString = safeString.replaceAll(PERIOD_REPLACEMENT, ".");
      // System.out.println(safeString);
      if (mergeFinal) {
        mergeFinal = false;
        String prev = sents.remove(sents.size() - 1);
        safeString = prev + safeString;
      }
      if (merge1) { // makes so that the merge happens on the next pass through
        merge1 = false;
        mergeFinal = true;
      }

      sents.add(safeString);
      // System.out.println("start minus one: "+(currentStart-1)+" stop: "+currentStop);
      if (currentStart < 0 || currentStop < 0) {
        Logger.logln("Something went really wrong making sentence tokens.");
        System.exit(0);
      }
      // System.out.println("The rest of the text: "+text.substring(currentStart));
      currentStart = currentStop + 1;
      if (currentStart >= lenText) {
        foundEOS = false;
        continue;
      }
      foundEOS = sent.find(currentStart);
    }

    return sents;
  }