Blame view
egs/voxforge/gst_demo/run-live.py
4.56 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
#!/usr/bin/env python # # Copyright (c) 2013 Tanel Alumae # # Slightly inspired by the CMU Sphinx's Pocketsphinx Gstreamer plugin demo (which has BSD license) # # Apache 2.0 from __future__ import print_function import sys import os import gi gi.require_version('Gst', '1.0') from gi.repository import GObject, Gst, Gtk, Gdk GObject.threads_init() Gdk.threads_init() Gst.init(None) class DemoApp(object): """GStreamer/Kaldi Demo Application""" def __init__(self): """Initialize a DemoApp object""" self.init_gui() self.init_gst() def init_gui(self): """Initialize the GUI components""" self.window = Gtk.Window() self.window.connect("destroy", self.quit) self.window.set_default_size(400,200) self.window.set_border_width(10) vbox = Gtk.VBox() self.text = Gtk.TextView() self.textbuf = self.text.get_buffer() self.text.set_wrap_mode(Gtk.WrapMode.WORD) vbox.pack_start(self.text, True, True, 1) self.button = Gtk.Button("Speak") self.button.connect('clicked', self.button_clicked) vbox.pack_start(self.button, False, False, 5) self.window.add(vbox) self.window.show_all() def quit(self, window): Gtk.main_quit() def init_gst(self): """Initialize the speech components""" self.pulsesrc = Gst.ElementFactory.make("pulsesrc", "pulsesrc") if self.pulsesrc == None: print("Error loading pulsesrc GST plugin. You probably need the gstreamer1.0-pulseaudio package", file=sys.stderr) sys.exit() self.audioconvert = Gst.ElementFactory.make("audioconvert", "audioconvert") self.audioresample = Gst.ElementFactory.make("audioresample", "audioresample") self.asr = Gst.ElementFactory.make("onlinegmmdecodefaster", "asr") self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink") if self.asr: model_dir = "online-data/models/tri2b_mmi/" if not os.path.isdir(model_dir): print("Model (%s) not downloaded. Run run-simulated.sh first" % model_dir, file=sys.stderr) sys.exit(1) self.asr.set_property("fst", model_dir + "HCLG.fst") self.asr.set_property("lda-mat", model_dir + "matrix") self.asr.set_property("model", model_dir + "model") self.asr.set_property("word-syms", model_dir + "words.txt") self.asr.set_property("silence-phones", "1:2:3:4:5") self.asr.set_property("max-active", 4000) self.asr.set_property("beam", 12.0) self.asr.set_property("acoustic-scale", 0.0769) else: print("Couldn't create the onlinegmmfasterdecoder element. ", file=sys.stderr) if "GST_PLUGIN_PATH" in os.environ: print("Have you compiled the Kaldi GStreamer plugin?", file=sys.stderr) else: print("You probably need to set the GST_PLUGIN_PATH envoronment variable", file=sys.stderr) print("Try running: GST_PLUGIN_PATH=../../../src/gst-plugin %s" % sys.argv[0], file=sys.stderr) sys.exit(); # initially silence the decoder self.asr.set_property("silent", True) self.pipeline = Gst.Pipeline() for element in [self.pulsesrc, self.audioconvert, self.audioresample, self.asr, self.fakesink]: self.pipeline.add(element) self.pulsesrc.link(self.audioconvert) self.audioconvert.link(self.audioresample) self.audioresample.link(self.asr) self.asr.link(self.fakesink) self.asr.connect('hyp-word', self._on_word) self.pipeline.set_state(Gst.State.PLAYING) def _on_word(self, asr, word): Gdk.threads_enter() if word == "<#s>": self.textbuf.insert_at_cursor(" ") else: self.textbuf.insert_at_cursor(word) self.textbuf.insert_at_cursor(" ") Gdk.threads_leave() def button_clicked(self, button): """Handle button presses.""" if button.get_label() == "Speak": button.set_label("Stop") self.asr.set_property("silent", False) else: button.set_label("Speak") self.asr.set_property("silent", True) if __name__ == '__main__': app = DemoApp() print(''' The (bigram) language model used to build the decoding graph was estimated on an audio book's text. The text in question is King Solomon's Mines" (http://www.gutenberg.org/ebooks/2166). You may want to read some sentences from this book first ...''') Gtk.main() |