/* public methods */
  public DirectedGraph parse() {
    /* add the parts of speech */
    addPOS();
    /* update all node to include parts of speech */
    System.out.println("Parsing file for parts of speech analysis...");

    byte[] buffer = new byte[100000]; // 100 kb
    try {
      /* retrieve the data from the file */
      FileInputStream fin = new FileInputStream(file);
      fin.read(buffer);
      fin.close();
    } catch (Exception e) {
      System.out.println("IO Error: " + e);
    }

    /* transfer data to a string */
    String data = new String(buffer);

    /* create a tokenizer to parse the data */
    StringTokenizer st =
        new StringTokenizer(
            data, " :;\"\n\t\r_,.!?`\u2015\u2012\u2014\u2013\u2212"); // unicode for dash

    /* temporary variables */
    String pos = "";
    String wordString = "";
    Word word = new Word("");
    Node node = new Node(word);
    while (st.hasMoreTokens()) {
      /* take care of extraneous hiphens */
      String test = st.nextToken();
      while (test.equals("-")) {
        test = st.nextToken();
      }
      while (test.equals("--")) {
        test = st.nextToken();
      }

      /* put the string to lowercase */
      wordString = test;
      wordString = wordString.toLowerCase();

      /* get the POS */
      if (st.hasMoreTokens()) {
        pos = st.nextToken();
      }
      /* if we have the possessive case */
      if (wordString.equals("'s")) {
        /* create a node object from the previous iteration */
        Word possessiveWord = new Word(word.toString() + "'s");
        node = new Node(possessiveWord);
        /* get the position of the node in the graph */
        int index = result.findIndex(node);
        /* add the possessive node to the graph */
        if (index >= 0) {
          /* make sure we get all the associations */
          node = result.nodeAt(index);
          /* transfer the part of speech */
          node.getWord().setPartOfSpeech(word.getPartOfSpeech());
          /* add the possessive quality */
          node.getWord().setPossessive();
          /* insert the node into our array at the right position */
          (result.getNodes())[index] = node;
        }

      } else {
        word = new Word(wordString);
        /* lots of if statements */
        if (pos.equals("AFX")) {
          word.setPartOfSpeech(1);
        } else if (pos.equals("CC")) {
          word.setPartOfSpeech(2);
        } else if (pos.equals("CD")) {
          word.setPartOfSpeech(3);
        } else if (pos.equals("DT")) {
          word.setPartOfSpeech(4);
        } else if (pos.equals("EX")) {
          word.setPartOfSpeech(5);
        } else if (pos.equals("FW")) {
          word.setPartOfSpeech(6);
        } else if (pos.equals("IN")) {
          word.setPartOfSpeech(7);
        } else if (pos.equals("JJ")) {
          word.setPartOfSpeech(8);
        } else if (pos.equals("JJR")) {
          word.setPartOfSpeech(9);
        } else if (pos.equals("JJS")) {
          word.setPartOfSpeech(10);
        } else if (pos.equals("LS")) {
          word.setPartOfSpeech(11);
        } else if (pos.equals("MD")) {
          word.setPartOfSpeech(12);
        } else if (pos.equals("NN")) {
          word.setPartOfSpeech(13);
        } else if (pos.equals("NNP")) {
          word.setPartOfSpeech(14);
        } else if (pos.equals("NNPS")) {
          word.setPartOfSpeech(15);
        } else if (pos.equals("NNS")) {
          word.setPartOfSpeech(16);
        } else if (pos.equals("PDT")) {
          word.setPartOfSpeech(17);
        } else if (pos.equals("POS")) {
          word.setPartOfSpeech(18);
        } else if (pos.equals("PRP")) {
          word.setPartOfSpeech(19);
        } else if (pos.equals("PRP$")) {
          word.setPartOfSpeech(20);
        } else if (pos.equals("RB")) {
          word.setPartOfSpeech(21);
        } else if (pos.equals("RBR")) {
          word.setPartOfSpeech(22);
        } else if (pos.equals("RBS")) {
          word.setPartOfSpeech(23);
        } else if (pos.equals("RP")) {
          word.setPartOfSpeech(24);
        } else if (pos.equals("SYM")) {
          word.setPartOfSpeech(25);
        } else if (pos.equals("TO")) {
          word.setPartOfSpeech(26);
        } else if (pos.equals("UH")) {
          word.setPartOfSpeech(27);
        } else if (pos.equals("VB")) {
          word.setPartOfSpeech(28);
        } else if (pos.equals("VBD")) {
          word.setPartOfSpeech(29);
        } else if (pos.equals("VBG")) {
          word.setPartOfSpeech(30);
        } else if (pos.equals("VBN")) {
          word.setPartOfSpeech(31);
        } else if (pos.equals("VBP")) {
          word.setPartOfSpeech(32);
        } else if (pos.equals("VBZ")) {
          word.setPartOfSpeech(33);
        } else if (pos.equals("WDT")) {
          word.setPartOfSpeech(34);
        } else if (pos.equals("WP")) {
          word.setPartOfSpeech(35);
        } else if (pos.equals("WPS")) {
          word.setPartOfSpeech(36);
        } else if (pos.equals("WRB")) {
          word.setPartOfSpeech(37);
        }

        node = new Node(word);

        int index = result.findIndex(node);
        if (index >= 0) {
          /* make sure we get all the associations */
          node = result.nodeAt(index);
          /* transfer the part of speech */
          node.getWord().setPartOfSpeech(word.getPartOfSpeech());
          /* insert the node into our array at the right position */
          (result.getNodes())[index] = node;
        }
      }
    } // end while
    return result;
  }