/** Creates a list of GrammarResult objects and returns them. */ private TreeSet<GrammarResult> getRecognitionResults() { TreeSet<GrammarResult> results = new TreeSet<GrammarResult>(); for (int index = 0; index < mSrec.getResultCount() && index < RESULT_LIMIT; index++) { int confidence = Integer.parseInt(mSrec.getResult(index, Recognizer.KEY_CONFIDENCE)); String literal = mSrec.getResult(index, Recognizer.KEY_LITERAL); String meaning = mSrec.getResult(index, Recognizer.KEY_MEANING); GrammarResult result = new GrammarResult(meaning, literal, confidence); results.add(result); } return results; }
/** * Loads a compiled grammar from a file. * * @param grammar A file representing the compiled G2G-formatted grammar. * @return <code>true</code> on success */ public boolean loadGrammar(File grammar) { boolean successful = false; // Check directory contents... String[] contents = grammar.getParentFile().list(); for (String content : contents) { Log.e(TAG, content); } mGrammar = grammar; String path = mGrammar.getPath(); try { mSrec = new Recognizer(SREC_DIR + "/baseline11k.par"); mSrecGrammar = mSrec.new Grammar(path); mSrecGrammar.setupRecognizer(); mSrecGrammar.resetAllSlots(); successful = true; } catch (IllegalStateException e) { Log.e(TAG, e.toString()); String[] contents2 = grammar.getParentFile().list(); for (String content : contents2) { Log.e(TAG, "+" + content); } e.printStackTrace(); } catch (IOException e) { Log.e(TAG, e.toString()); e.printStackTrace(); } if (!successful) { if (mSrecGrammar != null) { mSrecGrammar.destroy(); } if (mSrec != null) { mSrec.destroy(); } mSrecGrammar = null; mGrammar = null; mSrec = null; } return successful; }
/** * Voice command recognition class. Takes a list of input commands and returns recognized commands. * * @author [email protected] (Alan Viverette) */ public class GrammarRecognizer { private static final String TAG = "GrammarRecognizer"; private static final String EXTENSION_G2G = "g2g"; private static final String SREC_DIR = Recognizer.getConfigDir(null); private static final int SAMPLE_RATE = 11025; private static final int RESULT_LIMIT = 5; // Initialization constants public static final int SUCCESS = 0; public static final int ERROR = -1; public static final String KEY_CONFIDENCE = Recognizer.KEY_CONFIDENCE; public static final String KEY_LITERAL = Recognizer.KEY_LITERAL; public static final String KEY_MEANING = Recognizer.KEY_MEANING; private WeakReference<Context> mContext; private File mGrammar; private Recognizer mSrec; private Recognizer.Grammar mSrecGrammar; private GrammarListener mListener; private Thread mThread; private boolean mAlive; /** * Creates a new GrammarRecognizer for the given Context. * * @param context The context that will own this object. */ public GrammarRecognizer(Context context) { mContext = new WeakReference<Context>(context); mAlive = false; } /** * Interrupts the recognition thread. You may call still recognize() after calling this method. * * @return <code>true</code> on success */ public boolean stop() { mAlive = false; if (mThread != null) { mThread.interrupt(); try { mThread.join(); } catch (InterruptedException e) { e.printStackTrace(); } mThread = null; return true; } return false; } public boolean isListening() { return mAlive; } /** * Stops the recognition thread and destroys the recognizer. You may not call any methods on the * object after calling this method. */ public void shutdown() { stop(); mSrecGrammar.destroy(); mSrec.destroy(); } /** * Loads a compiled grammar from a resource ID. * * @param resId The resource ID of the G2G-formatted grammar. * @return <code>true</code> on success */ public boolean loadGrammar(int resId) { File grammar = unpackGrammar(resId); return loadGrammar(grammar); } /** * Loads a compiled grammar from a file. * * @param grammar A file representing the compiled G2G-formatted grammar. * @return <code>true</code> on success */ public boolean loadGrammar(File grammar) { boolean successful = false; // Check directory contents... String[] contents = grammar.getParentFile().list(); for (String content : contents) { Log.e(TAG, content); } mGrammar = grammar; String path = mGrammar.getPath(); try { mSrec = new Recognizer(SREC_DIR + "/baseline11k.par"); mSrecGrammar = mSrec.new Grammar(path); mSrecGrammar.setupRecognizer(); mSrecGrammar.resetAllSlots(); successful = true; } catch (IllegalStateException e) { Log.e(TAG, e.toString()); String[] contents2 = grammar.getParentFile().list(); for (String content : contents2) { Log.e(TAG, "+" + content); } e.printStackTrace(); } catch (IOException e) { Log.e(TAG, e.toString()); e.printStackTrace(); } if (!successful) { if (mSrecGrammar != null) { mSrecGrammar.destroy(); } if (mSrec != null) { mSrec.destroy(); } mSrecGrammar = null; mGrammar = null; mSrec = null; } return successful; } /** * Loads the available slots in a grammar using the entries in the supplied GrammarMap. You must * load a grammar before calling this method. * * @param map A populated GrammarMap object. * @return <code>true</code> on success */ public boolean loadSlots(GrammarMap map) { Context context = mContext.get(); Resources res = context.getResources(); File cached = context.getFileStreamPath("compiled." + EXTENSION_G2G); cached.deleteOnExit(); for (Entry<String, List<GrammarEntry>> pair : map.getEntries()) { String slot = pair.getKey(); List<GrammarEntry> entries = pair.getValue(); for (GrammarEntry entry : entries) { mSrecGrammar.addWordToSlot(slot, entry.word, entry.pron, entry.weight, entry.tag); } } // Attempt to compile and save grammar to cache try { mSrecGrammar.compile(); cached.getParentFile().mkdirs(); mSrecGrammar.save(cached.getPath()); Log.i(TAG, "Compiled grammar to " + cached.getAbsolutePath()); } catch (IOException e) { e.printStackTrace(); return false; } // Destroy the old grammar mSrecGrammar.destroy(); mSrecGrammar = null; // Attempt to load new grammar from cached copy try { mGrammar = cached; mSrecGrammar = mSrec.new Grammar(cached.getPath()); mSrecGrammar.setupRecognizer(); } catch (IOException e) { e.printStackTrace(); return false; } return true; } /** * Sets this grammar's listener, which will receive recognition results and status updates. * * @param listener The listener to set. */ public void setListener(GrammarListener listener) { mListener = listener; } public void recognize() { stop(); mThread = new Thread() { @Override public void run() { recognizeThread(); } }; mThread.start(); } /** * Attempts to unpack a grammar resource into cache. The unpacked copy will be deleted when the * application exits normally. * * @param resId The resource ID of the raw grammar resource. * @return the file representing by the unpacked grammar */ private File unpackGrammar(int resId) { Context context = mContext.get(); String extension = EXTENSION_G2G; Resources res = context.getResources(); String name = res.getResourceEntryName(resId); File grammar = context.getFileStreamPath(name + "." + extension); grammar.deleteOnExit(); try { grammar.getParentFile().mkdirs(); InputStream in = res.openRawResource(resId); FileOutputStream out = new FileOutputStream(grammar); byte[] buffer = new byte[1024]; int count = 0; while ((count = in.read(buffer)) > 0) { out.write(buffer, 0, count); } Log.i(TAG, "Unpacked base grammar to " + grammar.getPath()); return grammar; } catch (IOException e) { e.printStackTrace(); Log.e(TAG, e.toString()); return null; } } /** Runs the recognition loop. */ private void recognizeThread() { if (mAlive) { return; } mAlive = true; if (Config.DEBUG) { Log.e(TAG, "Started recognition thread"); } InputStream mic = null; boolean recognizerStarted = false; // Possible results... String error = null; TreeSet<GrammarResult> results = null; boolean failure = false; try { mic = new MicrophoneInputStream(SAMPLE_RATE, SAMPLE_RATE * 15); mSrec.start(); recognizerStarted = true; while (mAlive) { if (Thread.interrupted()) { throw new InterruptedException(); } int event = mSrec.advance(); if (Config.DEBUG && event != Recognizer.EVENT_INCOMPLETE && event != Recognizer.EVENT_NEED_MORE_AUDIO) { Log.e(TAG, "event=" + event); } switch (event) { case Recognizer.EVENT_INCOMPLETE: case Recognizer.EVENT_STARTED: case Recognizer.EVENT_START_OF_VOICING: case Recognizer.EVENT_END_OF_VOICING: break; case Recognizer.EVENT_START_OF_UTTERANCE_TIMEOUT: if (Config.DEBUG) { Log.e(TAG, "Didn't hear anything, restarting"); } mSrec.stop(); mSrec.start(); break; case Recognizer.EVENT_NO_MATCH: if (Config.DEBUG) { Log.e(TAG, "No match, return not recognized"); } mAlive = false; failure = true; break; case Recognizer.EVENT_RECOGNITION_RESULT: if (Config.DEBUG) { Log.e(TAG, "Recognized something, return it"); } mAlive = false; results = getRecognitionResults(); break; case Recognizer.EVENT_NEED_MORE_AUDIO: mSrec.putAudio(mic); break; default: if (Config.DEBUG) { Log.e(TAG, "Received invalid event=" + event); } mAlive = false; error = Recognizer.eventToString(event); break; } } } catch (IOException e) { e.printStackTrace(); mListener.onRecognitionError(e.toString()); } catch (InterruptedException e) { // do nothing } finally { if (mSrec != null && recognizerStarted) { mSrec.stop(); } try { if (mic != null) { mic.close(); } } catch (IOException ex) { ex.printStackTrace(); } finally { mic = null; } // Wait until the very end to send results, otherwise we might // overlap and crash the AudioFlinger permanently! if (mListener != null) { if (error != null) { mListener.onRecognitionError(error); } else if (results != null) { mListener.onRecognitionSuccess(results); } else if (failure) { mListener.onRecognitionFailure(); } } } } /** Creates a list of GrammarResult objects and returns them. */ private TreeSet<GrammarResult> getRecognitionResults() { TreeSet<GrammarResult> results = new TreeSet<GrammarResult>(); for (int index = 0; index < mSrec.getResultCount() && index < RESULT_LIMIT; index++) { int confidence = Integer.parseInt(mSrec.getResult(index, Recognizer.KEY_CONFIDENCE)); String literal = mSrec.getResult(index, Recognizer.KEY_LITERAL); String meaning = mSrec.getResult(index, Recognizer.KEY_MEANING); GrammarResult result = new GrammarResult(meaning, literal, confidence); results.add(result); } return results; } /** * Handles recognition results from the voice recognition engine. * * @author [email protected] (Alan Viverette) */ public static interface GrammarListener { /** * Called when recognition is complete. Returns results sorted by highest confidence first. * * @param results a set of results sorted by highest confidence first */ public void onRecognitionSuccess(TreeSet<GrammarResult> results); /** * Called when recognition fails to return any results. You may resume recognition immediately * after receiving this callback. */ public void onRecognitionFailure(); /** * Called when something has gone wrong with recognition. Possible reasons include an unknown * event or an exception. You should not resume recognition after this callback. * * @param reason a String explaining why recognition failed */ public void onRecognitionError(String reason); } /** * A wrapper for GrammarRecognizer recognition results. * * @author [email protected] (Alan Viverette) */ public static class GrammarResult implements Comparable<GrammarResult> { private final String meaning; private final String literal; private final int confidence; /** * Constructs a new GrammarResult with the given meaning, literal, and confidence. * * @param meaning the constructed meaning of the literal result * @param literal exactly what the speech recognition engine heard * @param confidence integer representing the recognition confidence */ public GrammarResult(String meaning, String literal, int confidence) { this.meaning = meaning; this.literal = literal; this.confidence = confidence; } /** * Returns the literal recognition result, which is the grammar-based result heard by the * recognizer. * * @return A String representing what the recognizer heard. */ public String getLiteral() { return literal; } /** * Returns the meaning of the recognized literal according to the rules defined in the grammar. * * @return A String that represents the meaning of the literal result. */ public String getMeaning() { return meaning; } /** * Returns the confidence score of the recognition result. Scores are relative to available * grammar paths and other returned results. * * @return An integer value representing the confidence score of the recognition result. */ public int getConfidence() { return confidence; } /** * Returns the difference in confidence between this and another GrammarResult. * * @param other The other GrammarResult. * @return The difference in confidence between the results. */ @Override public int compareTo(GrammarResult other) { return other.confidence - confidence; } } /** * Contains a map of slot names to possible words. * * @author [email protected] (Alan Viverette) */ public static class GrammarMap { Map<String, List<GrammarEntry>> slotMap; public GrammarMap() { slotMap = new HashMap<String, List<GrammarEntry>>(); } public Set<String> getSlots() { return slotMap.keySet(); } public Set<Entry<String, List<GrammarEntry>>> getEntries() { return slotMap.entrySet(); } public List<GrammarEntry> getSlot(String slot) { return slotMap.get(slot); } /** * Assigns a new word to the specified slot. If either word or slot is <code>null</code>, then * no-op. * * @param slot the slot to assign the word to, ex. "Names" (not "@Names") * @param word the word to listen for, ex. "Alan Viverette" * @param pron (optional) word pronunciation in SREC format * @param weight (optional) word weight relative to other slot words * @param tag the meaning assigned to this word */ public void addWord(String slot, String word, String pron, int weight, String tag) { if (slot == null || word == null || tag == null) { return; } slot = "@" + slot; tag = "V='" + tag + "'"; List<GrammarEntry> entries = slotMap.get(slot); if (entries == null) { entries = new LinkedList<GrammarEntry>(); slotMap.put(slot, entries); } GrammarEntry entry = new GrammarEntry(); entry.word = word; entry.pron = pron; entry.weight = weight; entry.tag = tag; entries.add(entry); } /** * Assigns each word in the array to the specified slot. If the slot is null, then no-op. * * @param slot the slot to assign the word to, ex. "Names" (not "@Names") * @param words array of words to listen for, ex. "Alan Viverette" * @param prons (optional) array of word pronunciations in SREC format * @param weights (optional) array of word weights relative to other slot word weights; use null * for default * @param tag the meaning assigned to this word */ public void addWords(String slot, String[] words, String[] prons, int[] weights, String tag) { for (int i = 0; i < words.length; i++) { String word = words[i]; String pron = prons == null ? null : prons[i]; int weight = weights == null ? 1 : weights[i]; addWord(slot, word, pron, weight, tag); } } } /** * Represents a single entry in the grammar map. * * @author [email protected] (Alan Viverette) */ private static class GrammarEntry { protected String word; protected String pron; protected int weight; protected String tag; } }
/** Runs the recognition loop. */ private void recognizeThread() { if (mAlive) { return; } mAlive = true; if (Config.DEBUG) { Log.e(TAG, "Started recognition thread"); } InputStream mic = null; boolean recognizerStarted = false; // Possible results... String error = null; TreeSet<GrammarResult> results = null; boolean failure = false; try { mic = new MicrophoneInputStream(SAMPLE_RATE, SAMPLE_RATE * 15); mSrec.start(); recognizerStarted = true; while (mAlive) { if (Thread.interrupted()) { throw new InterruptedException(); } int event = mSrec.advance(); if (Config.DEBUG && event != Recognizer.EVENT_INCOMPLETE && event != Recognizer.EVENT_NEED_MORE_AUDIO) { Log.e(TAG, "event=" + event); } switch (event) { case Recognizer.EVENT_INCOMPLETE: case Recognizer.EVENT_STARTED: case Recognizer.EVENT_START_OF_VOICING: case Recognizer.EVENT_END_OF_VOICING: break; case Recognizer.EVENT_START_OF_UTTERANCE_TIMEOUT: if (Config.DEBUG) { Log.e(TAG, "Didn't hear anything, restarting"); } mSrec.stop(); mSrec.start(); break; case Recognizer.EVENT_NO_MATCH: if (Config.DEBUG) { Log.e(TAG, "No match, return not recognized"); } mAlive = false; failure = true; break; case Recognizer.EVENT_RECOGNITION_RESULT: if (Config.DEBUG) { Log.e(TAG, "Recognized something, return it"); } mAlive = false; results = getRecognitionResults(); break; case Recognizer.EVENT_NEED_MORE_AUDIO: mSrec.putAudio(mic); break; default: if (Config.DEBUG) { Log.e(TAG, "Received invalid event=" + event); } mAlive = false; error = Recognizer.eventToString(event); break; } } } catch (IOException e) { e.printStackTrace(); mListener.onRecognitionError(e.toString()); } catch (InterruptedException e) { // do nothing } finally { if (mSrec != null && recognizerStarted) { mSrec.stop(); } try { if (mic != null) { mic.close(); } } catch (IOException ex) { ex.printStackTrace(); } finally { mic = null; } // Wait until the very end to send results, otherwise we might // overlap and crash the AudioFlinger permanently! if (mListener != null) { if (error != null) { mListener.onRecognitionError(error); } else if (results != null) { mListener.onRecognitionSuccess(results); } else if (failure) { mListener.onRecognitionFailure(); } } } }
/** * Stops the recognition thread and destroys the recognizer. You may not call any methods on the * object after calling this method. */ public void shutdown() { stop(); mSrecGrammar.destroy(); mSrec.destroy(); }