|
|
@@ -0,0 +1,70 @@
|
|
|
+
|
|
|
+"""
|
|
|
+azure voice service
|
|
|
+"""
|
|
|
+import json
|
|
|
+import os
|
|
|
+import time
|
|
|
+import azure.cognitiveservices.speech as speechsdk
|
|
|
+from aip import AipSpeech
|
|
|
+from bridge.reply import Reply, ReplyType
|
|
|
+from common.log import logger
|
|
|
+from common.tmp_dir import TmpDir
|
|
|
+from voice.voice import Voice
|
|
|
+from voice.audio_convert import get_pcm_from_wav
|
|
|
+from config import conf
|
|
|
+"""
|
|
|
+Azure voice
|
|
|
+主目录设置文件中需填写azure_voice_api_key和azure_voice_region
|
|
|
+
|
|
|
+查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
|
|
|
+
|
|
|
+"""
|
|
|
+
|
|
|
+class AzureVoice(Voice):
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ try:
|
|
|
+ curdir = os.path.dirname(__file__)
|
|
|
+ config_path = os.path.join(curdir, "config.json")
|
|
|
+ config = None
|
|
|
+ if not os.path.exists(config_path): #如果没有配置文件,创建本地配置文件
|
|
|
+ config = { "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", "speech_recognition_language": "zh-CN"}
|
|
|
+ with open(config_path, "w") as fw:
|
|
|
+ json.dump(config, fw, indent=4)
|
|
|
+ else:
|
|
|
+ with open(config_path, "r") as fr:
|
|
|
+ config = json.load(fr)
|
|
|
+ self.api_key = conf().get('azure_voice_api_key')
|
|
|
+ self.api_region = conf().get('azure_voice_region')
|
|
|
+ self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
|
|
|
+ self.speech_config.speech_synthesis_voice_name = config["speech_synthesis_voice_name"]
|
|
|
+ self.speech_config.speech_recognition_language = config["speech_recognition_language"]
|
|
|
+ except Exception as e:
|
|
|
+ logger.warn("AzureVoice init failed: %s, ignore " % e)
|
|
|
+
|
|
|
+ def voiceToText(self, voice_file):
|
|
|
+ audio_config = speechsdk.AudioConfig(filename=voice_file)
|
|
|
+ speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
|
|
|
+ result = speech_recognizer.recognize_once()
|
|
|
+ if result.reason == speechsdk.ResultReason.RecognizedSpeech:
|
|
|
+ logger.info('[Azure] voiceToText voice file name={} text={}'.format(voice_file, result.text))
|
|
|
+ reply = Reply(ReplyType.TEXT, result.text)
|
|
|
+ else:
|
|
|
+ logger.error('[Azure] voiceToText error, result={}'.format(result))
|
|
|
+ reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
|
|
|
+ return reply
|
|
|
+
|
|
|
+ def textToVoice(self, text):
|
|
|
+ fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
|
|
|
+ audio_config = speechsdk.AudioConfig(filename=fileName)
|
|
|
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
|
|
|
+ result = speech_synthesizer.speak_text(text)
|
|
|
+ if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
|
|
+ logger.info(
|
|
|
+ '[Azure] textToVoice text={} voice file name={}'.format(text, fileName))
|
|
|
+ reply = Reply(ReplyType.VOICE, fileName)
|
|
|
+ else:
|
|
|
+ logger.error('[Azure] textToVoice error, result={}'.format(result))
|
|
|
+ reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
|
|
|
+ return reply
|