3 rokov pred · 1545a9f262
--- a/config.py
+++ b/config.py
@@ -47,16 +47,20 @@ available_setting = {
 
				     "speech_recognition": False,  # 是否开启语音识别
			
 
				     "group_speech_recognition": False,  # 是否开启群组语音识别
			
 
				     "voice_reply_voice": False,  # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
			
 
				-    "voice_to_text": "openai",  # 语音识别引擎，支持openai,google
			
 
				-    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline)
			
 
				+    "voice_to_text": "openai",  # 语音识别引擎，支持openai,google,azure
			
 
				+    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline),azure
			
 
				 
			
 
				-    # baidu api的配置， 使用百度语音识别和语音合成时需要
			
 
				+    # baidu 语音api配置， 使用百度语音识别和语音合成时需要
			
 
				     "baidu_app_id": "",
			
 
				     "baidu_api_key": "",
			
 
				     "baidu_secret_key": "",
			
 
				     # 1536普通话(支持简单的英文识别) 1737英语 1637粤语 1837四川话 1936普通话远场
			
 
				     "baidu_dev_pid": "1536",
			
 
				 
			
 
				+    # azure 语音api配置， 使用azure语音识别和语音合成时需要
			
 
				+    "azure_voice_api_key": "",
			
 
				+    "azure_voice_region": "japaneast",
			
 
				+
			
 
				     # 服务时间限制，目前支持itchat
			
 
				     "chat_time_module": False,  # 是否开启服务时间限制
			
 
				     "chat_start_time": "00:00",  # 服务开始时间
			
--- a/voice/azure/azure_voice.py
+++ b/voice/azure/azure_voice.py
@@ -0,0 +1,70 @@
 
				+
			
 
				+"""
			
 
				+azure voice service
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+import time
			
 
				+import azure.cognitiveservices.speech as speechsdk
			
 
				+from aip import AipSpeech
			
 
				+from bridge.reply import Reply, ReplyType
			
 
				+from common.log import logger
			
 
				+from common.tmp_dir import TmpDir
			
 
				+from voice.voice import Voice
			
 
				+from voice.audio_convert import get_pcm_from_wav
			
 
				+from config import conf
			
 
				+"""
			
 
				+Azure voice
			
 
				+主目录设置文件中需填写azure_voice_api_key和azure_voice_region
			
 
				+
			
 
				+查看可用的 voice： https://speech.microsoft.com/portal/voicegallery
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+class AzureVoice(Voice):
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        try:
			
 
				+            curdir = os.path.dirname(__file__)
			
 
				+            config_path = os.path.join(curdir, "config.json")
			
 
				+            config = None
			
 
				+            if not os.path.exists(config_path): #如果没有配置文件，创建本地配置文件
			
 
				+                config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"}
			
 
				+                with open(config_path, "w") as fw:
			
 
				+                    json.dump(config, fw, indent=4)
			
 
				+            else:
			
 
				+                with open(config_path, "r") as fr:
			
 
				+                    config = json.load(fr)
			
 
				+            self.api_key = conf().get('azure_voice_api_key')
			
 
				+            self.api_region = conf().get('azure_voice_region')
			
 
				+            self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
			
 
				+            self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
			
 
				+            self.speech_config.speech_recognition_language = config["speech_recognition_language"]
			
 
				+        except Exception as e:
			
 
				+            logger.warn("AzureVoice init failed: %s, ignore " % e)
			
 
				+
			
 
				+    def voiceToText(self, voice_file):
			
 
				+        audio_config = speechsdk.AudioConfig(filename=voice_file)
			
 
				+        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
			
 
				+        result = speech_recognizer.recognize_once()
			
 
				+        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
			
 
				+            logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text))
			
 
				+            reply = Reply(ReplyType.TEXT, result.text)
			
 
				+        else:
			
 
				+            logger.error('[Azure] voiceToText error, result={}'.format(result))
			
 
				+            reply = Reply(ReplyType.ERROR, "抱歉，语音识别失败")
			
 
				+        return reply
			
 
				+
			
 
				+    def textToVoice(self, text):
			
 
				+        fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
			
 
				+        audio_config = speechsdk.AudioConfig(filename=fileName)
			
 
				+        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
			
 
				+        result = speech_synthesizer.speak_text(text)
			
 
				+        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
			
 
				+            logger.info(
			
 
				+                '[Azure] textToVoice text={} voice file name={}'.format(text, fileName))
			
 
				+            reply = Reply(ReplyType.VOICE, fileName)
			
 
				+        else:
			
 
				+            logger.error('[Azure] textToVoice error, result={}'.format(result))
			
 
				+            reply = Reply(ReplyType.ERROR, "抱歉，语音合成失败")
			
 
				+        return reply
			
--- a/voice/azure/config.json.template
+++ b/voice/azure/config.json.template
@@ -0,0 +1,4 @@
 
				+{
			
 
				+    "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural",
			
 
				+    "speech_recognition_language": "zh-CN"
			
 
				+}
			
--- a/voice/voice_factory.py
+++ b/voice/voice_factory.py
@@ -20,4 +20,7 @@ def create_voice(voice_type):
 
				     elif voice_type == 'pytts':
			
 
				         from voice.pytts.pytts_voice import PyttsVoice
			
 
				         return PyttsVoice()
			
 
				+    elif voice_type == 'azure':
			
 
				+        from voice.azure.azure_voice import AzureVoice
			
 
				+        return AzureVoice()
			
 
				     raise RuntimeError