7人参与 • 2025-03-08 • Python
实现思路
osk模型进行输入语音转换
txt字典导航程序路径
pyttsx3引擎进行语音打印输出
关键词=程序路径
完整代码
import os import json import queue import sounddevice as sd from vosk import model, kaldirecognizer import subprocess import time import pyttsx3 import threading # 初始化 pyttsx3 引擎 engine = pyttsx3.init() engine.setproperty('rate', 150) # 设置语速 engine.setproperty('volume', 1.0) # 设置音量 # 加载vosk模型 model_path = r"d:\daku\yuyinshibie\vosk-model-small-cn-0.22" if not os.path.exists(model_path): print(f"模型路径不存在: {model_path}") engine.say(f"模型路径不存在: {model_path}") engine.runandwait() exit(1) # 读取字典文件,格式为 "命令=程序路径" def load_app_dict(file_path): app_dict = {} if not os.path.exists(file_path): print(f"字典文件不存在: {file_path}") engine.say(f"字典文件不存在: {file_path}") engine.runandwait() return app_dict with open(file_path, 'r', encoding='utf-8') as file: for line in file: parts = line.strip().split('=') if len(parts) == 2: keys, value = parts # 处理可能存在的别名情况,例如 "微信,weixin" for key in keys.split(','): app_dict[key.strip()] = value.strip() return app_dict # 启动应用程序 def launch_application(app_name, app_dict): if app_name in app_dict: app_path = app_dict[app_name] response = f"正在启动 {app_name}..." say(response) subprocess.popen(app_path) time.sleep(2) # 等待2秒再继续监听 else: response = f"找不到与 '{app_name}' 对应的应用程序。" say(response) # 定义一个函数用于语音输出,并在说的时候暂停监听 def say(text): global stream, callback_func if stream is not none: with stream_lock: stream.callback = none # 移除回调函数以暂停监听 stream.stop() # 暂停音频流 engine.say(text) engine.runandwait() if stream is not none: with stream_lock: stream.start() # 恢复音频流 stream.callback = callback_func # 重新设置回调函数 # 初始化模型和识别器 model = model(model_path) rec = kaldirecognizer(model, 16000) q = queue.queue() last_partial_result = "" last_full_command = "" stream_lock = threading.lock() stream = none callback_func = none def callback(indata, frames, time, status): if status: print(status, file=sys.stderr) q.put(bytes(indata)) # 主程序 if __name__ == "__main__": dict_file = r"d:\daku\yuyinshibie\zidian.txt" # 字典文件路径 app_dict = load_app_dict(dict_file) try: # 提前初始化音频流 callback_func = callback stream = sd.rawinputstream(samplerate=16000, blocksize=8000, dtype='int16', channels=1, callback=callback) stream.start() say("请说:") while true: data = q.get() if rec.acceptwaveform(data): result = json.loads(rec.result()) command = result['text'].strip() if command and command != last_full_command: print(f"你说的是: {command}") say(f"你说的是: {command}") if "打开" in command: app_to_open = command.replace("打开", "").strip() launch_application(app_to_open, app_dict) last_full_command = command elif rec.partialresult(): partial_result = json.loads(rec.partialresult())['partial'] if partial_result and "打开" in partial_result and partial_result != last_partial_result: print(f"部分结果: {partial_result}") say(f"部分结果: {partial_result}") last_partial_result = partial_result except keyboardinterrupt: say("\n退出程序。") finally: if stream is not none: stream.stop() stream.close()
关键词部分,为了识别准确以及出现谐音内容可以增添多个关键词使用,作为分割
字典路径如果出现中文字符有可能会报错!
代码意义不大,如果考虑深入:可以尝试增加快捷键,以及相关应用接口可以更好控制
上班族打开电脑i第一件事情是启动相关应用,同样可以尝试多应用编组启动
到此这篇关于python实现语音启动电脑应用程序的文章就介绍到这了,更多相关python语音启动电脑内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!
您想发表意见!!点此发布评论
版权声明:本文内容由互联网用户贡献,该文观点仅代表作者本人。本站仅提供信息存储服务,不拥有所有权,不承担相关法律责任。 如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 2386932994@qq.com 举报,一经查实将立刻删除。
发表评论