public static double[][] extractBigrams(
      File file, MapSymbolTable symbolTable, TokenizerFactory tokenizerFactory, String charset)
      throws Exception {

    char[] cs = Files.readCharsFromFile(file, charset);
    String[] tokens = tokenizerFactory.tokenizer(cs, 0, cs.length).tokenize();
    System.out.println("    Number of tokens=" + tokens.length);

    int[] symbols = new int[tokens.length];
    for (int i = 0; i < tokens.length; ++i) {
      symbols[i] =
          Strings.allLetters(tokens[i].toCharArray()) ? symbolTable.getOrAddSymbol(tokens[i]) : -1;
    }

    int numSymbols = symbolTable.numSymbols();
    System.out.println("    Number of distinct tokens=" + numSymbols);
    System.out.println("    #Matrix entries=" + numSymbols * numSymbols);

    double[][] values = new double[numSymbols][numSymbols];
    for (int i = 0; i < numSymbols; ++i) Arrays.fill(values[i], 0.0);

    for (int i = 1; i < symbols.length; ++i) {
      int left = symbols[i - 1];
      int right = symbols[i];
      if (left >= 0 && right >= 0) values[symbols[i - 1]][symbols[i]] += 1.0;
    }

    return values;
  }